#
tokens: 42031/50000 1/216 files (page 31/35)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 31 of 35. Use http://codebase.md/pragmar/mcp_server_webcrawl/crawlers/base/adapter.html?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .gitignore
├── CONTRIBUTING.md
├── docs
│   ├── _images
│   │   ├── interactive.document.webp
│   │   ├── interactive.search.webp
│   │   └── mcpswc.svg
│   ├── _modules
│   │   ├── index.html
│   │   ├── mcp_server_webcrawl
│   │   │   ├── crawlers
│   │   │   │   ├── archivebox
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── base
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── api.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   ├── indexed.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── httrack
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── interrobot
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── katana
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── siteone
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── warc
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   └── wget
│   │   │   │       ├── adapter.html
│   │   │   │       ├── crawler.html
│   │   │   │       └── tests.html
│   │   │   ├── crawlers.html
│   │   │   ├── extras
│   │   │   │   ├── markdown.html
│   │   │   │   ├── regex.html
│   │   │   │   ├── snippets.html
│   │   │   │   ├── thumbnails.html
│   │   │   │   └── xpath.html
│   │   │   ├── interactive
│   │   │   │   ├── highlights.html
│   │   │   │   ├── search.html
│   │   │   │   ├── session.html
│   │   │   │   └── ui.html
│   │   │   ├── main.html
│   │   │   ├── models
│   │   │   │   ├── resources.html
│   │   │   │   └── sites.html
│   │   │   ├── templates
│   │   │   │   └── tests.html
│   │   │   ├── utils
│   │   │   │   ├── blobs.html
│   │   │   │   ├── cli.html
│   │   │   │   ├── logger.html
│   │   │   │   ├── querycache.html
│   │   │   │   ├── server.html
│   │   │   │   └── tools.html
│   │   │   └── utils.html
│   │   └── re.html
│   ├── _sources
│   │   ├── guides
│   │   │   ├── archivebox.rst.txt
│   │   │   ├── httrack.rst.txt
│   │   │   ├── interrobot.rst.txt
│   │   │   ├── katana.rst.txt
│   │   │   ├── siteone.rst.txt
│   │   │   ├── warc.rst.txt
│   │   │   └── wget.rst.txt
│   │   ├── guides.rst.txt
│   │   ├── index.rst.txt
│   │   ├── installation.rst.txt
│   │   ├── interactive.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.archivebox.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.base.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.httrack.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.interrobot.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.katana.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.siteone.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.warc.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.wget.rst.txt
│   │   ├── mcp_server_webcrawl.extras.rst.txt
│   │   ├── mcp_server_webcrawl.interactive.rst.txt
│   │   ├── mcp_server_webcrawl.models.rst.txt
│   │   ├── mcp_server_webcrawl.rst.txt
│   │   ├── mcp_server_webcrawl.templates.rst.txt
│   │   ├── mcp_server_webcrawl.utils.rst.txt
│   │   ├── modules.rst.txt
│   │   ├── prompts.rst.txt
│   │   └── usage.rst.txt
│   ├── _static
│   │   ├── _sphinx_javascript_frameworks_compat.js
│   │   ├── basic.css
│   │   ├── css
│   │   │   ├── badge_only.css
│   │   │   ├── fonts
│   │   │   │   ├── fontawesome-webfont.eot
│   │   │   │   ├── fontawesome-webfont.svg
│   │   │   │   ├── fontawesome-webfont.ttf
│   │   │   │   ├── fontawesome-webfont.woff
│   │   │   │   ├── fontawesome-webfont.woff2
│   │   │   │   ├── lato-bold-italic.woff
│   │   │   │   ├── lato-bold-italic.woff2
│   │   │   │   ├── lato-bold.woff
│   │   │   │   ├── lato-bold.woff2
│   │   │   │   ├── lato-normal-italic.woff
│   │   │   │   ├── lato-normal-italic.woff2
│   │   │   │   ├── lato-normal.woff
│   │   │   │   ├── lato-normal.woff2
│   │   │   │   ├── Roboto-Slab-Bold.woff
│   │   │   │   ├── Roboto-Slab-Bold.woff2
│   │   │   │   ├── Roboto-Slab-Regular.woff
│   │   │   │   └── Roboto-Slab-Regular.woff2
│   │   │   └── theme.css
│   │   ├── doctools.js
│   │   ├── documentation_options.js
│   │   ├── file.png
│   │   ├── fonts
│   │   │   ├── Lato
│   │   │   │   ├── lato-bold.eot
│   │   │   │   ├── lato-bold.ttf
│   │   │   │   ├── lato-bold.woff
│   │   │   │   ├── lato-bold.woff2
│   │   │   │   ├── lato-bolditalic.eot
│   │   │   │   ├── lato-bolditalic.ttf
│   │   │   │   ├── lato-bolditalic.woff
│   │   │   │   ├── lato-bolditalic.woff2
│   │   │   │   ├── lato-italic.eot
│   │   │   │   ├── lato-italic.ttf
│   │   │   │   ├── lato-italic.woff
│   │   │   │   ├── lato-italic.woff2
│   │   │   │   ├── lato-regular.eot
│   │   │   │   ├── lato-regular.ttf
│   │   │   │   ├── lato-regular.woff
│   │   │   │   └── lato-regular.woff2
│   │   │   └── RobotoSlab
│   │   │       ├── roboto-slab-v7-bold.eot
│   │   │       ├── roboto-slab-v7-bold.ttf
│   │   │       ├── roboto-slab-v7-bold.woff
│   │   │       ├── roboto-slab-v7-bold.woff2
│   │   │       ├── roboto-slab-v7-regular.eot
│   │   │       ├── roboto-slab-v7-regular.ttf
│   │   │       ├── roboto-slab-v7-regular.woff
│   │   │       └── roboto-slab-v7-regular.woff2
│   │   ├── images
│   │   │   ├── interactive.document.png
│   │   │   ├── interactive.document.webp
│   │   │   ├── interactive.search.png
│   │   │   ├── interactive.search.webp
│   │   │   └── mcpswc.svg
│   │   ├── jquery.js
│   │   ├── js
│   │   │   ├── badge_only.js
│   │   │   ├── theme.js
│   │   │   └── versions.js
│   │   ├── language_data.js
│   │   ├── minus.png
│   │   ├── plus.png
│   │   ├── pygments.css
│   │   ├── searchtools.js
│   │   └── sphinx_highlight.js
│   ├── .buildinfo
│   ├── .nojekyll
│   ├── genindex.html
│   ├── guides
│   │   ├── archivebox.html
│   │   ├── httrack.html
│   │   ├── interrobot.html
│   │   ├── katana.html
│   │   ├── siteone.html
│   │   ├── warc.html
│   │   └── wget.html
│   ├── guides.html
│   ├── index.html
│   ├── installation.html
│   ├── interactive.html
│   ├── mcp_server_webcrawl.crawlers.archivebox.html
│   ├── mcp_server_webcrawl.crawlers.base.html
│   ├── mcp_server_webcrawl.crawlers.html
│   ├── mcp_server_webcrawl.crawlers.httrack.html
│   ├── mcp_server_webcrawl.crawlers.interrobot.html
│   ├── mcp_server_webcrawl.crawlers.katana.html
│   ├── mcp_server_webcrawl.crawlers.siteone.html
│   ├── mcp_server_webcrawl.crawlers.warc.html
│   ├── mcp_server_webcrawl.crawlers.wget.html
│   ├── mcp_server_webcrawl.extras.html
│   ├── mcp_server_webcrawl.html
│   ├── mcp_server_webcrawl.interactive.html
│   ├── mcp_server_webcrawl.models.html
│   ├── mcp_server_webcrawl.templates.html
│   ├── mcp_server_webcrawl.utils.html
│   ├── modules.html
│   ├── objects.inv
│   ├── prompts.html
│   ├── py-modindex.html
│   ├── search.html
│   ├── searchindex.js
│   └── usage.html
├── LICENSE
├── MANIFEST.in
├── prompts
│   ├── audit404.md
│   ├── auditfiles.md
│   ├── auditperf.md
│   ├── auditseo.md
│   ├── gopher.md
│   ├── README.md
│   └── testsearch.md
├── pyproject.toml
├── README.md
├── setup.py
├── sphinx
│   ├── _static
│   │   └── images
│   │       ├── interactive.document.png
│   │       ├── interactive.document.webp
│   │       ├── interactive.search.png
│   │       ├── interactive.search.webp
│   │       └── mcpswc.svg
│   ├── _templates
│   │   └── layout.html
│   ├── conf.py
│   ├── guides
│   │   ├── archivebox.rst
│   │   ├── httrack.rst
│   │   ├── interrobot.rst
│   │   ├── katana.rst
│   │   ├── siteone.rst
│   │   ├── warc.rst
│   │   └── wget.rst
│   ├── guides.rst
│   ├── index.rst
│   ├── installation.rst
│   ├── interactive.rst
│   ├── make.bat
│   ├── Makefile
│   ├── mcp_server_webcrawl.crawlers.archivebox.rst
│   ├── mcp_server_webcrawl.crawlers.base.rst
│   ├── mcp_server_webcrawl.crawlers.httrack.rst
│   ├── mcp_server_webcrawl.crawlers.interrobot.rst
│   ├── mcp_server_webcrawl.crawlers.katana.rst
│   ├── mcp_server_webcrawl.crawlers.rst
│   ├── mcp_server_webcrawl.crawlers.siteone.rst
│   ├── mcp_server_webcrawl.crawlers.warc.rst
│   ├── mcp_server_webcrawl.crawlers.wget.rst
│   ├── mcp_server_webcrawl.extras.rst
│   ├── mcp_server_webcrawl.interactive.rst
│   ├── mcp_server_webcrawl.models.rst
│   ├── mcp_server_webcrawl.rst
│   ├── mcp_server_webcrawl.templates.rst
│   ├── mcp_server_webcrawl.utils.rst
│   ├── modules.rst
│   ├── prompts.rst
│   ├── readme.txt
│   └── usage.rst
└── src
    └── mcp_server_webcrawl
        ├── __init__.py
        ├── crawlers
        │   ├── __init__.py
        │   ├── archivebox
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── base
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── api.py
        │   │   ├── crawler.py
        │   │   ├── indexed.py
        │   │   └── tests.py
        │   ├── httrack
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── interrobot
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── katana
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── siteone
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── warc
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   └── wget
        │       ├── __init__.py
        │       ├── adapter.py
        │       ├── crawler.py
        │       └── tests.py
        ├── extras
        │   ├── __init__.py
        │   ├── markdown.py
        │   ├── regex.py
        │   ├── snippets.py
        │   ├── thumbnails.py
        │   └── xpath.py
        ├── interactive
        │   ├── __init__.py
        │   ├── highlights.py
        │   ├── search.py
        │   ├── session.py
        │   ├── ui.py
        │   └── views
        │       ├── base.py
        │       ├── document.py
        │       ├── help.py
        │       ├── requirements.py
        │       ├── results.py
        │       └── searchform.py
        ├── main.py
        ├── models
        │   ├── __init__.py
        │   ├── base.py
        │   ├── resources.py
        │   └── sites.py
        ├── settings.py
        ├── templates
        │   ├── __init__.py
        │   ├── markdown.xslt
        │   ├── tests_core.html
        │   └── tests.py
        └── utils
            ├── __init__.py
            ├── cli.py
            ├── logger.py
            ├── parser.py
            ├── parsetab.py
            ├── search.py
            ├── server.py
            ├── tests.py
            └── tools.py
```

# Files

--------------------------------------------------------------------------------
/docs/mcp_server_webcrawl.crawlers.base.html:
--------------------------------------------------------------------------------

```html
   1 | 
   2 | 
   3 | <!DOCTYPE html>
   4 | <html class="writer-html5" lang="en" data-content_root="./">
   5 | <head>
   6 |   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
   7 | 
   8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
   9 |   <title>mcp_server_webcrawl.crawlers.base package &mdash; mcp-server-webcrawl  documentation</title>
  10 |       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
  11 |       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=e59714d7" />
  12 | 
  13 |   
  14 |       <script src="_static/jquery.js?v=5d32c60e"></script>
  15 |       <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
  16 |       <script src="_static/documentation_options.js?v=5929fcd5"></script>
  17 |       <script src="_static/doctools.js?v=888ff710"></script>
  18 |       <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
  19 |     <script src="_static/js/theme.js"></script>
  20 |     <link rel="index" title="Index" href="genindex.html" />
  21 |     <link rel="search" title="Search" href="search.html" />
  22 |     <link rel="next" title="mcp_server_webcrawl.crawlers.archivebox package" href="mcp_server_webcrawl.crawlers.archivebox.html" />
  23 |     <link rel="prev" title="mcp_server_webcrawl.crawlers package" href="mcp_server_webcrawl.crawlers.html" /> 
  24 | </head>
  25 | 
  26 | <body class="wy-body-for-nav"> 
  27 |   <div class="wy-grid-for-nav">
  28 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
  29 |       <div class="wy-side-scroll">
  30 |         <div class="wy-side-nav-search" >
  31 | 
  32 |           
  33 |           
  34 |           <a href="index.html" class="icon icon-home">
  35 |             mcp-server-webcrawl
  36 |           </a>
  37 | <div role="search">
  38 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
  39 |     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
  40 |     <input type="hidden" name="check_keywords" value="yes" />
  41 |     <input type="hidden" name="area" value="default" />
  42 |   </form>
  43 | </div>
  44 |         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
  45 |               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
  46 | <ul class="current">
  47 | <li class="toctree-l1"><a class="reference internal" href="installation.html">Installation</a></li>
  48 | <li class="toctree-l1"><a class="reference internal" href="guides.html">Setup Guides</a></li>
  49 | <li class="toctree-l1"><a class="reference internal" href="usage.html">Usage</a></li>
  50 | <li class="toctree-l1"><a class="reference internal" href="prompts.html">Prompt Routines</a></li>
  51 | <li class="toctree-l1"><a class="reference internal" href="interactive.html">Interactive Mode</a></li>
  52 | <li class="toctree-l1 current"><a class="reference internal" href="modules.html">mcp_server_webcrawl</a><ul class="current">
  53 | <li class="toctree-l2 current"><a class="reference internal" href="mcp_server_webcrawl.html">mcp_server_webcrawl package</a></li>
  54 | </ul>
  55 | </li>
  56 | </ul>
  57 | 
  58 |         </div>
  59 |       </div>
  60 |     </nav>
  61 | 
  62 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
  63 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
  64 |           <a href="index.html">mcp-server-webcrawl</a>
  65 |       </nav>
  66 | 
  67 |       <div class="wy-nav-content">
  68 |         <div class="rst-content">
  69 |           <div role="navigation" aria-label="Page navigation">
  70 |   <ul class="wy-breadcrumbs">
  71 |       <li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
  72 |           <li class="breadcrumb-item"><a href="modules.html">mcp_server_webcrawl</a></li>
  73 |           <li class="breadcrumb-item"><a href="mcp_server_webcrawl.html">mcp_server_webcrawl package</a></li>
  74 |           <li class="breadcrumb-item"><a href="mcp_server_webcrawl.crawlers.html">mcp_server_webcrawl.crawlers package</a></li>
  75 |       <li class="breadcrumb-item active">mcp_server_webcrawl.crawlers.base package</li>
  76 |       <li class="wy-breadcrumbs-aside">
  77 |             <a href="_sources/mcp_server_webcrawl.crawlers.base.rst.txt" rel="nofollow"> View page source</a>
  78 |       </li>
  79 |   </ul>
  80 |   <hr/>
  81 | </div>
  82 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
  83 |            <div itemprop="articleBody">
  84 |              
  85 |   <section id="mcp-server-webcrawl-crawlers-base-package">
  86 | <h1>mcp_server_webcrawl.crawlers.base package<a class="headerlink" href="#mcp-server-webcrawl-crawlers-base-package" title="Link to this heading"></a></h1>
  87 | <section id="submodules">
  88 | <h2>Submodules<a class="headerlink" href="#submodules" title="Link to this heading"></a></h2>
  89 | </section>
  90 | <section id="module-mcp_server_webcrawl.crawlers.base.adapter">
  91 | <span id="mcp-server-webcrawl-crawlers-base-adapter-module"></span><h2>mcp_server_webcrawl.crawlers.base.adapter module<a class="headerlink" href="#module-mcp_server_webcrawl.crawlers.base.adapter" title="Link to this heading"></a></h2>
  92 | <dl class="py class">
  93 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus">
  94 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">IndexStatus</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#IndexStatus"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus" title="Link to this definition"></a></dt>
  95 | <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/enum.html#enum.Enum" title="(in Python v3.14)"><code class="xref py py-class docutils literal notranslate"><span class="pre">Enum</span></code></a></p>
  96 | <p>An enumeration.</p>
  97 | <dl class="py attribute">
  98 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.UNDEFINED">
  99 | <span class="sig-name descname"><span class="pre">UNDEFINED</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.UNDEFINED" title="Link to this definition"></a></dt>
 100 | <dd></dd></dl>
 101 | 
 102 | <dl class="py attribute">
 103 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.IDLE">
 104 | <span class="sig-name descname"><span class="pre">IDLE</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'idle'</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.IDLE" title="Link to this definition"></a></dt>
 105 | <dd></dd></dl>
 106 | 
 107 | <dl class="py attribute">
 108 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.INDEXING">
 109 | <span class="sig-name descname"><span class="pre">INDEXING</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'indexing'</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.INDEXING" title="Link to this definition"></a></dt>
 110 | <dd></dd></dl>
 111 | 
 112 | <dl class="py attribute">
 113 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.PARTIAL">
 114 | <span class="sig-name descname"><span class="pre">PARTIAL</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'partial'</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.PARTIAL" title="Link to this definition"></a></dt>
 115 | <dd></dd></dl>
 116 | 
 117 | <dl class="py attribute">
 118 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.COMPLETE">
 119 | <span class="sig-name descname"><span class="pre">COMPLETE</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'complete'</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.COMPLETE" title="Link to this definition"></a></dt>
 120 | <dd></dd></dl>
 121 | 
 122 | <dl class="py attribute">
 123 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.REMOTE">
 124 | <span class="sig-name descname"><span class="pre">REMOTE</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'remote'</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.REMOTE" title="Link to this definition"></a></dt>
 125 | <dd></dd></dl>
 126 | 
 127 | <dl class="py attribute">
 128 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.FAILED">
 129 | <span class="sig-name descname"><span class="pre">FAILED</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'failed'</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus.FAILED" title="Link to this definition"></a></dt>
 130 | <dd></dd></dl>
 131 | 
 132 | </dd></dl>
 133 | 
 134 | <dl class="py class">
 135 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState">
 136 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">IndexState</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#IndexState"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState" title="Link to this definition"></a></dt>
 137 | <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.14)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 138 | <p>Shared state between crawler and manager for indexing progress</p>
 139 | <dl class="py attribute">
 140 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.status">
 141 | <span class="sig-name descname"><span class="pre">status</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus" title="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus"><span class="pre">IndexStatus</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.status" title="Link to this definition"></a></dt>
 142 | <dd></dd></dl>
 143 | 
 144 | <dl class="py attribute">
 145 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.processed">
 146 | <span class="sig-name descname"><span class="pre">processed</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><span class="pre">int</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.processed" title="Link to this definition"></a></dt>
 147 | <dd></dd></dl>
 148 | 
 149 | <dl class="py attribute">
 150 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.time_start">
 151 | <span class="sig-name descname"><span class="pre">time_start</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.datetime" title="(in Python v3.14)"><span class="pre">datetime</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.14)"><span class="pre">None</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.time_start" title="Link to this definition"></a></dt>
 152 | <dd></dd></dl>
 153 | 
 154 | <dl class="py attribute">
 155 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.time_end">
 156 | <span class="sig-name descname"><span class="pre">time_end</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.datetime" title="(in Python v3.14)"><span class="pre">datetime</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.14)"><span class="pre">None</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.time_end" title="Link to this definition"></a></dt>
 157 | <dd></dd></dl>
 158 | 
 159 | <dl class="py method">
 160 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.set_status">
 161 | <span class="sig-name descname"><span class="pre">set_status</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">status</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#IndexState.set_status"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.set_status" title="Link to this definition"></a></dt>
 162 | <dd><dl class="field-list simple">
 163 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 164 | <dd class="field-odd"><p><strong>status</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus" title="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus"><em>IndexStatus</em></a>) – </p>
 165 | </dd>
 166 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 167 | <dd class="field-even"><p>None</p>
 168 | </dd>
 169 | </dl>
 170 | </dd></dl>
 171 | 
 172 | <dl class="py method">
 173 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.increment_processed">
 174 | <span class="sig-name descname"><span class="pre">increment_processed</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#IndexState.increment_processed"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.increment_processed" title="Link to this definition"></a></dt>
 175 | <dd></dd></dl>
 176 | 
 177 | <dl class="py property">
 178 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.duration">
 179 | <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">duration</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><span class="pre">str</span></a></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.duration" title="Link to this definition"></a></dt>
 180 | <dd></dd></dl>
 181 | 
 182 | <dl class="py method">
 183 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.is_timeout">
 184 | <span class="sig-name descname"><span class="pre">is_timeout</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#IndexState.is_timeout"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.is_timeout" title="Link to this definition"></a></dt>
 185 | <dd><p>Check if the indexing operation has exceeded the timeout threshold</p>
 186 | <dl class="field-list simple">
 187 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 188 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.14)">bool</a></p>
 189 | </dd>
 190 | </dl>
 191 | </dd></dl>
 192 | 
 193 | <dl class="py method">
 194 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.to_dict">
 195 | <span class="sig-name descname"><span class="pre">to_dict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#IndexState.to_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.to_dict" title="Link to this definition"></a></dt>
 196 | <dd><p>Convert the IndexState to a dictionary representation</p>
 197 | <dl class="field-list simple">
 198 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 199 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)">dict</a></p>
 200 | </dd>
 201 | </dl>
 202 | </dd></dl>
 203 | 
 204 | <dl class="py method">
 205 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.IndexState.__init__">
 206 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">status</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">IndexStatus.UNDEFINED</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">processed</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">time_start</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">time_end</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState.__init__" title="Link to this definition"></a></dt>
 207 | <dd><dl class="field-list simple">
 208 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 209 | <dd class="field-odd"><ul class="simple">
 210 | <li><p><strong>status</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexStatus" title="mcp_server_webcrawl.crawlers.base.adapter.IndexStatus"><em>IndexStatus</em></a>) – </p></li>
 211 | <li><p><strong>processed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
 212 | <li><p><strong>time_start</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.datetime" title="(in Python v3.14)"><em>datetime</em></a><em> | </em><em>None</em>) – </p></li>
 213 | <li><p><strong>time_end</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.datetime" title="(in Python v3.14)"><em>datetime</em></a><em> | </em><em>None</em>) – </p></li>
 214 | </ul>
 215 | </dd>
 216 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 217 | <dd class="field-even"><p>None</p>
 218 | </dd>
 219 | </dl>
 220 | </dd></dl>
 221 | 
 222 | </dd></dl>
 223 | 
 224 | <dl class="py class">
 225 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.SitesGroup">
 226 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">SitesGroup</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#SitesGroup"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesGroup" title="Link to this definition"></a></dt>
 227 | <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.14)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 228 | <p>Container class supports the searching of one or more sites at once.</p>
 229 | <dl class="field-list simple">
 230 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 231 | <dd class="field-odd"><ul class="simple">
 232 | <li><p><strong>datasrc</strong> – site datasrc</p></li>
 233 | <li><p><strong>site_ids</strong> – site ids of the sites</p></li>
 234 | <li><p><strong>site_paths</strong> – paths to site contents (directories)</p></li>
 235 | </ul>
 236 | </dd>
 237 | </dl>
 238 | <dl class="py method">
 239 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.SitesGroup.__init__">
 240 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">datasrc</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_ids</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_paths</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#SitesGroup.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesGroup.__init__" title="Link to this definition"></a></dt>
 241 | <dd><p>Container class supports the searching of one or more sites at once.</p>
 242 | <dl class="field-list simple">
 243 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 244 | <dd class="field-odd"><ul class="simple">
 245 | <li><p><strong>datasrc</strong> (<a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a>) – site datasrc</p></li>
 246 | <li><p><strong>site_ids</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a><em>]</em>) – site ids of the sites</p></li>
 247 | <li><p><strong>site_paths</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a><em>]</em>) – paths to site contents (directories)</p></li>
 248 | </ul>
 249 | </dd>
 250 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 251 | <dd class="field-even"><p>None</p>
 252 | </dd>
 253 | </dl>
 254 | </dd></dl>
 255 | 
 256 | <dl class="py method">
 257 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.SitesGroup.get_sites">
 258 | <span class="sig-name descname"><span class="pre">get_sites</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#SitesGroup.get_sites"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesGroup.get_sites" title="Link to this definition"></a></dt>
 259 | <dd><dl class="field-list simple">
 260 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 261 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)">dict</a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)">int</a>, <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a>]</p>
 262 | </dd>
 263 | </dl>
 264 | </dd></dl>
 265 | 
 266 | </dd></dl>
 267 | 
 268 | <dl class="py class">
 269 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.SitesStat">
 270 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">SitesStat</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#SitesStat"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesStat" title="Link to this definition"></a></dt>
 271 | <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.14)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 272 | <p>Some basic bookeeping, for troubleshooting</p>
 273 | <dl class="field-list simple">
 274 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 275 | <dd class="field-odd"><ul class="simple">
 276 | <li><p><strong>group</strong> – SitesGroup to track statistics for</p></li>
 277 | <li><p><strong>cached</strong> – whether the group was retrieved from cache</p></li>
 278 | </ul>
 279 | </dd>
 280 | </dl>
 281 | <dl class="py method">
 282 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.SitesStat.__init__">
 283 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cached</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#SitesStat.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesStat.__init__" title="Link to this definition"></a></dt>
 284 | <dd><p>Some basic bookeeping, for troubleshooting</p>
 285 | <dl class="field-list simple">
 286 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 287 | <dd class="field-odd"><ul class="simple">
 288 | <li><p><strong>group</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesGroup" title="mcp_server_webcrawl.crawlers.base.adapter.SitesGroup"><em>SitesGroup</em></a>) – SitesGroup to track statistics for</p></li>
 289 | <li><p><strong>cached</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.14)"><em>bool</em></a>) – whether the group was retrieved from cache</p></li>
 290 | </ul>
 291 | </dd>
 292 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 293 | <dd class="field-even"><p>None</p>
 294 | </dd>
 295 | </dl>
 296 | </dd></dl>
 297 | 
 298 | </dd></dl>
 299 | 
 300 | <dl class="py class">
 301 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager">
 302 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">BaseManager</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager" title="Link to this definition"></a></dt>
 303 | <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.14)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 304 | <p>Base class for managing web crawler data in in-memory SQLite databases.
 305 | Provides connection pooling and caching for efficient access.</p>
 306 | <p>Initialize the manager with statistics.</p>
 307 | <dl class="py method">
 308 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.__init__">
 309 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.__init__" title="Link to this definition"></a></dt>
 310 | <dd><p>Initialize the manager with statistics.</p>
 311 | <dl class="field-list simple">
 312 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 313 | <dd class="field-odd"><p>None</p>
 314 | </dd>
 315 | </dl>
 316 | </dd></dl>
 317 | 
 318 | <dl class="py method">
 319 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.string_to_id">
 320 | <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">string_to_id</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.string_to_id"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.string_to_id" title="Link to this definition"></a></dt>
 321 | <dd><p>Convert a string, such as a directory name, to a numeric ID
 322 | suitable for a database primary key.</p>
 323 | <p>Hash space and collision probability notes:
 324 | - [:8]  = 32 bits (4.29 billion values) - ~1% collision chance with 10,000 items
 325 | - [:12] = 48 bits (280 trillion values) - ~0.0000001% collision chance with 10,000 items
 326 | - [:16] = 64 bits (max safe SQLite INTEGER) - near-zero collision, 9.22 quintillion values
 327 | - SQLite INTEGER type is 64-bit signed, with max value of 9,223,372,036,854,775,807.
 328 | - The big problem with larger hashspaces is the length of the ids they generate for presentation.</p>
 329 | <dl class="field-list simple">
 330 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 331 | <dd class="field-odd"><p><strong>value</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – Input string to convert to an ID</p>
 332 | </dd>
 333 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 334 | <dd class="field-even"><p>Integer ID derived from the input string</p>
 335 | </dd>
 336 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 337 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)">int</a></p>
 338 | </dd>
 339 | </dl>
 340 | </dd></dl>
 341 | 
 342 | <dl class="py method">
 343 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.get_basic_headers">
 344 | <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">get_basic_headers</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">resource_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">path</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.get_basic_headers"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.get_basic_headers" title="Link to this definition"></a></dt>
 345 | <dd><p>Generate basic HTTP headers for a resource.</p>
 346 | <dl class="field-list simple">
 347 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 348 | <dd class="field-odd"><ul class="simple">
 349 | <li><p><strong>file_size</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – size of the file in bytes</p></li>
 350 | <li><p><strong>resource_type</strong> (<a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.resources.ResourceResultType" title="mcp_server_webcrawl.models.resources.ResourceResultType"><em>ResourceResultType</em></a>) – type of resource to generate headers for</p></li>
 351 | <li><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a>) – file path used for MIME type detection</p></li>
 352 | </ul>
 353 | </dd>
 354 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 355 | <dd class="field-even"><p>HTTP headers string with content type and length</p>
 356 | </dd>
 357 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 358 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a></p>
 359 | </dd>
 360 | </dl>
 361 | </dd></dl>
 362 | 
 363 | <dl class="py method">
 364 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.read_files">
 365 | <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">read_files</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">paths</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.read_files"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.read_files" title="Link to this definition"></a></dt>
 366 | <dd><p>Read content from multiple files concurrently.</p>
 367 | <dl class="field-list simple">
 368 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 369 | <dd class="field-odd"><p><strong>paths</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a><em>]</em>) – list of file paths to read</p>
 370 | </dd>
 371 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 372 | <dd class="field-even"><p>dictionary mapping file paths to their content or None for binary/unreadable files</p>
 373 | </dd>
 374 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 375 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)">dict</a>[<a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a>, <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a> | None]</p>
 376 | </dd>
 377 | </dl>
 378 | </dd></dl>
 379 | 
 380 | <dl class="py method">
 381 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.read_file_contents">
 382 | <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">read_file_contents</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">resource_type</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.read_file_contents"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.read_file_contents" title="Link to this definition"></a></dt>
 383 | <dd><p>Read content from text files with better error handling and encoding detection.</p>
 384 | <dl class="field-list simple">
 385 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 386 | <dd class="field-odd"><ul class="simple">
 387 | <li><p><strong>file_path</strong> (<a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a>) – path to the file to read</p></li>
 388 | <li><p><strong>resource_type</strong> (<a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.resources.ResourceResultType" title="mcp_server_webcrawl.models.resources.ResourceResultType"><em>ResourceResultType</em></a>) – type of resource to determine if content should be read</p></li>
 389 | </ul>
 390 | </dd>
 391 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 392 | <dd class="field-even"><p>file content as string or None for binary/unreadable files</p>
 393 | </dd>
 394 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 395 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a> | None</p>
 396 | </dd>
 397 | </dl>
 398 | </dd></dl>
 399 | 
 400 | <dl class="py method">
 401 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.decruft_path">
 402 | <em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">decruft_path</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">path</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.decruft_path"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.decruft_path" title="Link to this definition"></a></dt>
 403 | <dd><p>Very light touch cleanup of file naming, these tmps are creating noise
 404 | and extensions are useful in classifying resources</p>
 405 | <dl class="field-list simple">
 406 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 407 | <dd class="field-odd"><p><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – file path string to clean up</p>
 408 | </dd>
 409 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 410 | <dd class="field-even"><p>cleaned path string with temp files and weird extensions normalized</p>
 411 | </dd>
 412 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 413 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a></p>
 414 | </dd>
 415 | </dl>
 416 | </dd></dl>
 417 | 
 418 | <dl class="py method">
 419 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.get_stats">
 420 | <span class="sig-name descname"><span class="pre">get_stats</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.get_stats"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.get_stats" title="Link to this definition"></a></dt>
 421 | <dd><dl class="field-list simple">
 422 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 423 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesStat" title="mcp_server_webcrawl.crawlers.base.adapter.SitesStat"><em>SitesStat</em></a>]</p>
 424 | </dd>
 425 | </dl>
 426 | </dd></dl>
 427 | 
 428 | <dl class="py method">
 429 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.adapter.BaseManager.get_resources_for_sites_group">
 430 | <span class="sig-name descname"><span class="pre">get_resources_for_sites_group</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">sites_group</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">query</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fields</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sort</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">limit</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">offset</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">swap_values</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">{}</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/adapter.html#BaseManager.get_resources_for_sites_group"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager.get_resources_for_sites_group" title="Link to this definition"></a></dt>
 431 | <dd><p>Get resources from directories using structured query parsing with SearchQueryParser.</p>
 432 | <p>This method extracts types, fields, and statuses from the querystring instead of
 433 | accepting them as separate arguments, using the new SearchSubquery functionality.</p>
 434 | <dl class="field-list simple">
 435 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 436 | <dd class="field-odd"><ul class="simple">
 437 | <li><p><strong>sites_group</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesGroup" title="mcp_server_webcrawl.crawlers.base.adapter.SitesGroup"><em>SitesGroup</em></a>) – Group of sites to search in</p></li>
 438 | <li><p><strong>query</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – Search query string that can include field:value syntax for filtering</p></li>
 439 | <li><p><strong>fields</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – resource fields to be returned by the API (Content, Headers, etc.)</p></li>
 440 | <li><p><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em> | </em><em>None</em>) – Sort order for results</p></li>
 441 | <li><p><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – Maximum number of results to return</p></li>
 442 | <li><p><strong>offset</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – Number of results to skip for pagination</p></li>
 443 | <li><p><strong>swap_values</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)"><em>dict</em></a>) – per-field parameterized values to check for (and replace)</p></li>
 444 | </ul>
 445 | </dd>
 446 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 447 | <dd class="field-even"><p>Tuple of (list of ResourceResult objects, total count, connection_index_state)</p>
 448 | </dd>
 449 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 450 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.14)">tuple</a>[<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.resources.ResourceResult" title="mcp_server_webcrawl.models.resources.ResourceResult"><em>ResourceResult</em></a>], <a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)">int</a>, <a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState" title="mcp_server_webcrawl.crawlers.base.adapter.IndexState"><em>IndexState</em></a>]</p>
 451 | </dd>
 452 | </dl>
 453 | <p class="rubric">Notes</p>
 454 | <p>Returns empty results if sites is empty or not provided.
 455 | If the database is being built, it will log a message and return empty results.</p>
 456 | <p>This method extracts field-specific filters from the query string using SearchQueryParser:
 457 | - type:html (to filter by resource type)
 458 | - status:200 (to filter by HTTP status)
 459 | Any fields present in the SearchSubquery will be included in the response.</p>
 460 | </dd></dl>
 461 | 
 462 | </dd></dl>
 463 | 
 464 | </section>
 465 | <section id="module-mcp_server_webcrawl.crawlers.base.api">
 466 | <span id="mcp-server-webcrawl-crawlers-base-api-module"></span><h2>mcp_server_webcrawl.crawlers.base.api module<a class="headerlink" href="#module-mcp_server_webcrawl.crawlers.base.api" title="Link to this heading"></a></h2>
 467 | <dl class="py class">
 468 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApiEncoder">
 469 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">BaseJsonApiEncoder</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApiEncoder"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApiEncoder" title="Link to this definition"></a></dt>
 470 | <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">JSONEncoder</span></code></p>
 471 | <p>Custom JSON encoder for BaseJsonApi objects and ResourceResultType enums.</p>
 472 | <p>Constructor for JSONEncoder, with sensible defaults.</p>
 473 | <p>If skipkeys is false, then it is a TypeError to attempt
 474 | encoding of keys that are not str, int, float or None.  If
 475 | skipkeys is True, such items are simply skipped.</p>
 476 | <p>If ensure_ascii is true, the output is guaranteed to be str
 477 | objects with all incoming non-ASCII characters escaped.  If
 478 | ensure_ascii is false, the output can contain non-ASCII characters.</p>
 479 | <p>If check_circular is true, then lists, dicts, and custom encoded
 480 | objects will be checked for circular references during encoding to
 481 | prevent an infinite recursion (which would cause an OverflowError).
 482 | Otherwise, no such check takes place.</p>
 483 | <p>If allow_nan is true, then NaN, Infinity, and -Infinity will be
 484 | encoded as such.  This behavior is not JSON specification compliant,
 485 | but is consistent with most JavaScript based encoders and decoders.
 486 | Otherwise, it will be a ValueError to encode such floats.</p>
 487 | <p>If sort_keys is true, then the output of dictionaries will be
 488 | sorted by key; this is useful for regression tests to ensure
 489 | that JSON serializations can be compared on a day-to-day basis.</p>
 490 | <p>If indent is a non-negative integer, then JSON array
 491 | elements and object members will be pretty-printed with that
 492 | indent level.  An indent level of 0 will only insert newlines.
 493 | None is the most compact representation.</p>
 494 | <p>If specified, separators should be an (item_separator, key_separator)
 495 | tuple.  The default is (’, ‘, ‘: ‘) if <em>indent</em> is <code class="docutils literal notranslate"><span class="pre">None</span></code> and
 496 | (‘,’, ‘: ‘) otherwise.  To get the most compact JSON representation,
 497 | you should specify (‘,’, ‘:’) to eliminate whitespace.</p>
 498 | <p>If specified, default is a function that gets called for objects
 499 | that can’t otherwise be serialized.  It should return a JSON encodable
 500 | version of the object or raise a <code class="docutils literal notranslate"><span class="pre">TypeError</span></code>.</p>
 501 | <dl class="py method">
 502 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApiEncoder.default">
 503 | <span class="sig-name descname"><span class="pre">default</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">obj</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApiEncoder.default"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApiEncoder.default" title="Link to this definition"></a></dt>
 504 | <dd><p>Override default encoder to handle custom types.</p>
 505 | <dl class="field-list simple">
 506 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 507 | <dd class="field-odd"><p><strong>obj</strong> – Object to encode</p>
 508 | </dd>
 509 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 510 | <dd class="field-even"><p>JSON serializable representation of the object</p>
 511 | </dd>
 512 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 513 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.14)"><em>Any</em></a></p>
 514 | </dd>
 515 | </dl>
 516 | </dd></dl>
 517 | 
 518 | </dd></dl>
 519 | 
 520 | <dl class="py class">
 521 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi">
 522 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">BaseJsonApi</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApi"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi" title="Link to this definition"></a></dt>
 523 | <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.14)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 524 | <p>Base class for JSON API responses.</p>
 525 | <p>Provides a standardized structure for API responses including metadata,
 526 | results, and error handling.</p>
 527 | <p>Construct with the arguments of creation (aoc), these will be echoed back in
 528 | JSON response. This is an object that collapses into json on json dumps. This is
 529 | done with everything within implementing to_dict.</p>
 530 | <dl class="field-list simple">
 531 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 532 | <dd class="field-odd"><ul class="simple">
 533 | <li><p><strong>method</strong> – API method name</p></li>
 534 | <li><p><strong>args</strong> – Dictionary of API arguments</p></li>
 535 | <li><p><strong>index_state</strong> – indexing, complete, remote, etc.</p></li>
 536 | </ul>
 537 | </dd>
 538 | </dl>
 539 | <dl class="py method">
 540 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.__init__">
 541 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">method</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">index_state</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApi.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.__init__" title="Link to this definition"></a></dt>
 542 | <dd><p>Construct with the arguments of creation (aoc), these will be echoed back in
 543 | JSON response. This is an object that collapses into json on json dumps. This is
 544 | done with everything within implementing to_dict.</p>
 545 | <dl class="field-list simple">
 546 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 547 | <dd class="field-odd"><ul class="simple">
 548 | <li><p><strong>method</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – API method name</p></li>
 549 | <li><p><strong>args</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)"><em>dict</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.14)"><em>Any</em></a><em>]</em>) – Dictionary of API arguments</p></li>
 550 | <li><p><strong>index_state</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState" title="mcp_server_webcrawl.crawlers.base.adapter.IndexState"><em>IndexState</em></a><em> | </em><em>None</em>) – indexing, complete, remote, etc.</p></li>
 551 | </ul>
 552 | </dd>
 553 | </dl>
 554 | </dd></dl>
 555 | 
 556 | <dl class="py property">
 557 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.total">
 558 | <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">total</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><span class="pre">int</span></a></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.total" title="Link to this definition"></a></dt>
 559 | <dd><p>Returns the total number of results.</p>
 560 | <dl class="field-list simple">
 561 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 562 | <dd class="field-odd"><p>Integer count of total results</p>
 563 | </dd>
 564 | </dl>
 565 | </dd></dl>
 566 | 
 567 | <dl class="py method">
 568 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.get_results">
 569 | <span class="sig-name descname"><span class="pre">get_results</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApi.get_results"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.get_results" title="Link to this definition"></a></dt>
 570 | <dd><p>Returns list of results.</p>
 571 | <dl class="field-list simple">
 572 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 573 | <dd class="field-odd"><p>Results of type SiteResult or ResourceResult</p>
 574 | </dd>
 575 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 576 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.sites.SiteResult" title="mcp_server_webcrawl.models.sites.SiteResult"><em>SiteResult</em></a> | <a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.resources.ResourceResult" title="mcp_server_webcrawl.models.resources.ResourceResult"><em>ResourceResult</em></a>]</p>
 577 | </dd>
 578 | </dl>
 579 | </dd></dl>
 580 | 
 581 | <dl class="py method">
 582 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.set_results">
 583 | <span class="sig-name descname"><span class="pre">set_results</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">results</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">total</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">offset</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">limit</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApi.set_results"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.set_results" title="Link to this definition"></a></dt>
 584 | <dd><p>Set the results of the API response.</p>
 585 | <dl class="field-list simple">
 586 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 587 | <dd class="field-odd"><ul class="simple">
 588 | <li><p><strong>results</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.sites.SiteResult" title="mcp_server_webcrawl.models.sites.SiteResult"><em>SiteResult</em></a><em> | </em><a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.resources.ResourceResult" title="mcp_server_webcrawl.models.resources.ResourceResult"><em>ResourceResult</em></a><em>]</em>) – List of result objects</p></li>
 589 | <li><p><strong>total</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – Total number of results (including those beyond limit)</p></li>
 590 | <li><p><strong>offset</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – Starting position in the full result set</p></li>
 591 | <li><p><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – Maximum number of results to include</p></li>
 592 | </ul>
 593 | </dd>
 594 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 595 | <dd class="field-even"><p>None</p>
 596 | </dd>
 597 | </dl>
 598 | </dd></dl>
 599 | 
 600 | <dl class="py method">
 601 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.append_error">
 602 | <span class="sig-name descname"><span class="pre">append_error</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">message</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApi.append_error"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.append_error" title="Link to this definition"></a></dt>
 603 | <dd><p>Add an error to the JSON response, visible to the endpoint LLM.</p>
 604 | <dl class="field-list simple">
 605 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 606 | <dd class="field-odd"><p><strong>message</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – Error message to add</p>
 607 | </dd>
 608 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 609 | <dd class="field-even"><p>None</p>
 610 | </dd>
 611 | </dl>
 612 | </dd></dl>
 613 | 
 614 | <dl class="py method">
 615 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.to_dict">
 616 | <span class="sig-name descname"><span class="pre">to_dict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApi.to_dict"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.to_dict" title="Link to this definition"></a></dt>
 617 | <dd><p>Convert the object to a JSON-serializable dictionary.</p>
 618 | <dl class="field-list simple">
 619 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 620 | <dd class="field-odd"><p>Dictionary representation of the API response</p>
 621 | </dd>
 622 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 623 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)">dict</a>[<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a>, <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a> | <a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)">int</a> | <a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.14)">float</a> | <a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.14)">bool</a> | <a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.datetime" title="(in Python v3.14)"><em>datetime</em></a> | <a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a> | <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)">dict</a> | <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a> | None]</p>
 624 | </dd>
 625 | </dl>
 626 | </dd></dl>
 627 | 
 628 | <dl class="py method">
 629 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.to_json">
 630 | <span class="sig-name descname"><span class="pre">to_json</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/api.html#BaseJsonApi.to_json"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi.to_json" title="Link to this definition"></a></dt>
 631 | <dd><p>Return a JSON serializable representation of this object.</p>
 632 | <dl class="field-list simple">
 633 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 634 | <dd class="field-odd"><p>JSON string representation of the API response</p>
 635 | </dd>
 636 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 637 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a></p>
 638 | </dd>
 639 | </dl>
 640 | </dd></dl>
 641 | 
 642 | </dd></dl>
 643 | 
 644 | </section>
 645 | <section id="module-mcp_server_webcrawl.crawlers.base.crawler">
 646 | <span id="mcp-server-webcrawl-crawlers-base-crawler-module"></span><h2>mcp_server_webcrawl.crawlers.base.crawler module<a class="headerlink" href="#module-mcp_server_webcrawl.crawlers.base.crawler" title="Link to this heading"></a></h2>
 647 | <dl class="py class">
 648 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler">
 649 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">BaseCrawler</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="Link to this definition"></a></dt>
 650 | <dd><p>Bases: <a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.14)"><code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></a></p>
 651 | <p>Base crawler class that implements MCP server functionality.</p>
 652 | <p>This class provides the foundation for specialized crawlers to interact with
 653 | the MCP server and handle tool operations for web resources.</p>
 654 | <p>Initialize the BaseCrawler with a data source path and required adapter functions.</p>
 655 | <dl class="field-list simple">
 656 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 657 | <dd class="field-odd"><ul class="simple">
 658 | <li><p><strong>datasrc</strong> – path to the data source</p></li>
 659 | <li><p><strong>get_sites_func</strong> – function to retrieve sites from the data source</p></li>
 660 | <li><p><strong>get_resources_func</strong> – function to retrieve resources from the data source</p></li>
 661 | <li><p><strong>resource_field_mapping</strong> – mapping of resource field names to display names</p></li>
 662 | </ul>
 663 | </dd>
 664 | </dl>
 665 | <dl class="py method">
 666 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.__init__">
 667 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">datasrc</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_sites_func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_resources_func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">resource_field_mapping</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">{'content':</span> <span class="pre">'ResourcesFullText.Content',</span> <span class="pre">'created':</span> <span class="pre">'Resources.Created',</span> <span class="pre">'fulltext':</span> <span class="pre">'ResourcesFullText',</span> <span class="pre">'headers':</span> <span class="pre">'ResourcesFullText.Headers',</span> <span class="pre">'id':</span> <span class="pre">'ResourcesFullText.Id',</span> <span class="pre">'modified':</span> <span class="pre">'Resources.Modified',</span> <span class="pre">'site':</span> <span class="pre">'ResourcesFullText.Project',</span> <span class="pre">'size':</span> <span class="pre">'Resources.Size',</span> <span class="pre">'status':</span> <span class="pre">'Resources.Status',</span> <span class="pre">'time':</span> <span class="pre">'Resources.Time',</span> <span class="pre">'type':</span> <span class="pre">'ResourcesFullText.Type',</span> <span class="pre">'url':</span> <span class="pre">'ResourcesFullText.Url'}</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.__init__" title="Link to this definition"></a></dt>
 668 | <dd><p>Initialize the BaseCrawler with a data source path and required adapter functions.</p>
 669 | <dl class="field-list simple">
 670 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 671 | <dd class="field-odd"><ul class="simple">
 672 | <li><p><strong>datasrc</strong> (<a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a>) – path to the data source</p></li>
 673 | <li><p><strong>get_sites_func</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.14)"><em>Callable</em></a>) – function to retrieve sites from the data source</p></li>
 674 | <li><p><strong>get_resources_func</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.14)"><em>Callable</em></a>) – function to retrieve resources from the data source</p></li>
 675 | <li><p><strong>resource_field_mapping</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)"><em>dict</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>]</em>) – mapping of resource field names to display names</p></li>
 676 | </ul>
 677 | </dd>
 678 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 679 | <dd class="field-even"><p>None</p>
 680 | </dd>
 681 | </dl>
 682 | </dd></dl>
 683 | 
 684 | <dl class="py property">
 685 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.datasrc">
 686 | <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">datasrc</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><span class="pre">Path</span></a></em><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.datasrc" title="Link to this definition"></a></dt>
 687 | <dd></dd></dl>
 688 | 
 689 | <dl class="py method">
 690 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_list_prompts">
 691 | <em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">mcp_list_prompts</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.mcp_list_prompts"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_list_prompts" title="Link to this definition"></a></dt>
 692 | <dd><p>List available prompts (currently none).</p>
 693 | <dl class="field-list simple">
 694 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 695 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a></p>
 696 | </dd>
 697 | </dl>
 698 | </dd></dl>
 699 | 
 700 | <dl class="py method">
 701 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_list_resources">
 702 | <em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">mcp_list_resources</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.mcp_list_resources"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_list_resources" title="Link to this definition"></a></dt>
 703 | <dd><p>List available resources (currently none).</p>
 704 | <dl class="field-list simple">
 705 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 706 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a></p>
 707 | </dd>
 708 | </dl>
 709 | </dd></dl>
 710 | 
 711 | <dl class="py method">
 712 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.serve">
 713 | <em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">serve</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">stdin</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stdout</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.serve"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.serve" title="Link to this definition"></a></dt>
 714 | <dd><p>Launch the awaitable server.</p>
 715 | <dl class="field-list simple">
 716 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 717 | <dd class="field-odd"><ul class="simple">
 718 | <li><p><strong>stdin</strong> (<em>AsyncFile</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – input stream for the server</p></li>
 719 | <li><p><strong>stdout</strong> (<em>AsyncFile</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – output stream for the server</p></li>
 720 | </ul>
 721 | </dd>
 722 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 723 | <dd class="field-even"><p>The MCP server over stdio</p>
 724 | </dd>
 725 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 726 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)">dict</a>[<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a>, <a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.14)"><em>Any</em></a>]</p>
 727 | </dd>
 728 | </dl>
 729 | </dd></dl>
 730 | 
 731 | <dl class="py method">
 732 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_initialization_options">
 733 | <span class="sig-name descname"><span class="pre">get_initialization_options</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.get_initialization_options"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_initialization_options" title="Link to this definition"></a></dt>
 734 | <dd><p>Get the MCP initialization object.</p>
 735 | <dl class="field-list simple">
 736 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 737 | <dd class="field-odd"><p>Dictionary containing project information</p>
 738 | </dd>
 739 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 740 | <dd class="field-even"><p><em>InitializationOptions</em></p>
 741 | </dd>
 742 | </dl>
 743 | </dd></dl>
 744 | 
 745 | <dl class="py method">
 746 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_sites_api_json">
 747 | <span class="sig-name descname"><span class="pre">get_sites_api_json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.get_sites_api_json"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_sites_api_json" title="Link to this definition"></a></dt>
 748 | <dd><p>Get sites API result as JSON.</p>
 749 | <dl class="field-list simple">
 750 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 751 | <dd class="field-odd"><p>JSON string of sites API results</p>
 752 | </dd>
 753 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 754 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a></p>
 755 | </dd>
 756 | </dl>
 757 | </dd></dl>
 758 | 
 759 | <dl class="py method">
 760 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_resources_api_json">
 761 | <span class="sig-name descname"><span class="pre">get_resources_api_json</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.get_resources_api_json"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_resources_api_json" title="Link to this definition"></a></dt>
 762 | <dd><p>Get resources API result as JSON.</p>
 763 | <dl class="field-list simple">
 764 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 765 | <dd class="field-odd"><p>JSON string of resources API results</p>
 766 | </dd>
 767 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 768 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)">str</a></p>
 769 | </dd>
 770 | </dl>
 771 | </dd></dl>
 772 | 
 773 | <dl class="py method">
 774 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_sites_api">
 775 | <span class="sig-name descname"><span class="pre">get_sites_api</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fields</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.get_sites_api"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_sites_api" title="Link to this definition"></a></dt>
 776 | <dd><dl class="field-list simple">
 777 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 778 | <dd class="field-odd"><ul class="simple">
 779 | <li><p><strong>ids</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
 780 | <li><p><strong>fields</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
 781 | </ul>
 782 | </dd>
 783 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 784 | <dd class="field-even"><p><a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi" title="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi"><em>BaseJsonApi</em></a></p>
 785 | </dd>
 786 | </dl>
 787 | </dd></dl>
 788 | 
 789 | <dl class="py method">
 790 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_resources_api">
 791 | <span class="sig-name descname"><span class="pre">get_resources_api</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">sites</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">query</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fields</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sort</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">limit</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">20</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">offset</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">extras</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">extrasRegex</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">extrasXpath</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.get_resources_api"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_resources_api" title="Link to this definition"></a></dt>
 792 | <dd><dl class="field-list simple">
 793 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 794 | <dd class="field-odd"><ul class="simple">
 795 | <li><p><strong>sites</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
 796 | <li><p><strong>query</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – </p></li>
 797 | <li><p><strong>fields</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
 798 | <li><p><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em> | </em><em>None</em>) – </p></li>
 799 | <li><p><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
 800 | <li><p><strong>offset</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
 801 | <li><p><strong>extras</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
 802 | <li><p><strong>extrasRegex</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
 803 | <li><p><strong>extrasXpath</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
 804 | </ul>
 805 | </dd>
 806 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 807 | <dd class="field-even"><p><a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.api.BaseJsonApi" title="mcp_server_webcrawl.crawlers.base.api.BaseJsonApi"><em>BaseJsonApi</em></a></p>
 808 | </dd>
 809 | </dl>
 810 | </dd></dl>
 811 | 
 812 | <dl class="py method">
 813 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_list_tools">
 814 | <em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">mcp_list_tools</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.mcp_list_tools"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_list_tools" title="Link to this definition"></a></dt>
 815 | <dd><p>List available tools.</p>
 816 | <dl class="field-list simple">
 817 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 818 | <dd class="field-odd"><p>List of available tools</p>
 819 | </dd>
 820 | <dt class="field-even">Raises<span class="colon">:</span></dt>
 821 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#NotImplementedError" title="(in Python v3.14)"><strong>NotImplementedError</strong></a> – This method must be implemented by subclasses</p>
 822 | </dd>
 823 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 824 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<em>Tool</em>]</p>
 825 | </dd>
 826 | </dl>
 827 | </dd></dl>
 828 | 
 829 | <dl class="py method">
 830 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_call_tool">
 831 | <em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">mcp_call_tool</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">arguments</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.mcp_call_tool"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.mcp_call_tool" title="Link to this definition"></a></dt>
 832 | <dd><p>Handle tool execution requests. You can override this or super(), then tweak.
 833 | Basically, it is a passthrough.</p>
 834 | <dl class="field-list simple">
 835 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 836 | <dd class="field-odd"><ul class="simple">
 837 | <li><p><strong>name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – name of the tool to call</p></li>
 838 | <li><p><strong>arguments</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)"><em>dict</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Any" title="(in Python v3.14)"><em>Any</em></a><em>] </em><em>| </em><em>None</em>) – arguments to pass to the tool</p></li>
 839 | </ul>
 840 | </dd>
 841 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 842 | <dd class="field-even"><p>List of content objects resulting from the tool execution</p>
 843 | </dd>
 844 | <dt class="field-odd">Raises<span class="colon">:</span></dt>
 845 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.14)"><strong>ValueError</strong></a> – If the specified tool does not exist</p>
 846 | </dd>
 847 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 848 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<em>TextContent</em> | <em>ImageContent</em> | <em>EmbeddedResource</em>]</p>
 849 | </dd>
 850 | </dl>
 851 | </dd></dl>
 852 | 
 853 | <dl class="py method">
 854 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_thumbnails">
 855 | <span class="sig-name descname"><span class="pre">get_thumbnails</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">results</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/crawler.html#BaseCrawler.get_thumbnails"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler.get_thumbnails" title="Link to this definition"></a></dt>
 856 | <dd><dl class="field-list simple">
 857 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 858 | <dd class="field-odd"><p><strong>results</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.resources.ResourceResult" title="mcp_server_webcrawl.models.resources.ResourceResult"><em>ResourceResult</em></a><em>]</em>) – </p>
 859 | </dd>
 860 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 861 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<em>ImageContent</em>]</p>
 862 | </dd>
 863 | </dl>
 864 | </dd></dl>
 865 | 
 866 | </dd></dl>
 867 | 
 868 | </section>
 869 | <section id="module-mcp_server_webcrawl.crawlers.base.indexed">
 870 | <span id="mcp-server-webcrawl-crawlers-base-indexed-module"></span><h2>mcp_server_webcrawl.crawlers.base.indexed module<a class="headerlink" href="#module-mcp_server_webcrawl.crawlers.base.indexed" title="Link to this heading"></a></h2>
 871 | <dl class="py class">
 872 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.indexed.IndexedManager">
 873 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">IndexedManager</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/indexed.html#IndexedManager"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.indexed.IndexedManager" title="Link to this definition"></a></dt>
 874 | <dd><p>Bases: <a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.BaseManager" title="mcp_server_webcrawl.crawlers.base.adapter.BaseManager"><code class="xref py py-class docutils literal notranslate"><span class="pre">BaseManager</span></code></a></p>
 875 | <p>Initialize the manager with statistics.</p>
 876 | <dl class="py method">
 877 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.indexed.IndexedManager.__init__">
 878 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/indexed.html#IndexedManager.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.indexed.IndexedManager.__init__" title="Link to this definition"></a></dt>
 879 | <dd><p>Initialize the manager with statistics.</p>
 880 | </dd></dl>
 881 | 
 882 | <dl class="py method">
 883 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.indexed.IndexedManager.get_connection">
 884 | <span class="sig-name descname"><span class="pre">get_connection</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/indexed.html#IndexedManager.get_connection"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.indexed.IndexedManager.get_connection" title="Link to this definition"></a></dt>
 885 | <dd><p>Get database connection for sites in the group, creating if needed.</p>
 886 | <dl class="field-list simple">
 887 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 888 | <dd class="field-odd"><p><strong>group</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.SitesGroup" title="mcp_server_webcrawl.crawlers.base.adapter.SitesGroup"><em>SitesGroup</em></a>) – group of sites to connect to</p>
 889 | </dd>
 890 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 891 | <dd class="field-even"><p><dl class="simple">
 892 | <dt>Tuple of (SQLite connection to in-memory database with data loaded or None if building,</dt><dd><p>IndexState associated with this database)</p>
 893 | </dd>
 894 | </dl>
 895 | </p>
 896 | </dd>
 897 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 898 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.14)">tuple</a>[<a class="reference external" href="https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection" title="(in Python v3.14)"><em>Connection</em></a> | None, <a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.adapter.IndexState" title="mcp_server_webcrawl.crawlers.base.adapter.IndexState"><em>IndexState</em></a>]</p>
 899 | </dd>
 900 | </dl>
 901 | </dd></dl>
 902 | 
 903 | <dl class="py method">
 904 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.indexed.IndexedManager.get_sites_for_directories">
 905 | <span class="sig-name descname"><span class="pre">get_sites_for_directories</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">datasrc</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ids</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fields</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/indexed.html#IndexedManager.get_sites_for_directories"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.indexed.IndexedManager.get_sites_for_directories" title="Link to this definition"></a></dt>
 906 | <dd><p>List site directories in the datasrc directory as sites.</p>
 907 | <dl class="field-list simple">
 908 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 909 | <dd class="field-odd"><ul class="simple">
 910 | <li><p><strong>datasrc</strong> (<a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a>) – path to the directory containing site subdirectories</p></li>
 911 | <li><p><strong>ids</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a><em>] </em><em>| </em><em>None</em>) – optional list of site IDs to filter by</p></li>
 912 | <li><p><strong>fields</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)"><em>list</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>] </em><em>| </em><em>None</em>) – optional list of fields to include in the response</p></li>
 913 | </ul>
 914 | </dd>
 915 | <dt class="field-even">Returns<span class="colon">:</span></dt>
 916 | <dd class="field-even"><p>List of SiteResult objects, one for each site directory</p>
 917 | </dd>
 918 | <dt class="field-odd">Return type<span class="colon">:</span></dt>
 919 | <dd class="field-odd"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<a class="reference internal" href="mcp_server_webcrawl.models.html#mcp_server_webcrawl.models.sites.SiteResult" title="mcp_server_webcrawl.models.sites.SiteResult"><em>SiteResult</em></a>]</p>
 920 | </dd>
 921 | </dl>
 922 | <p class="rubric">Notes</p>
 923 | <p>Returns an empty list if the datasrc directory doesn’t exist.</p>
 924 | </dd></dl>
 925 | 
 926 | </dd></dl>
 927 | 
 928 | <dl class="py class">
 929 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.indexed.IndexedCrawler">
 930 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">IndexedCrawler</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/indexed.html#IndexedCrawler"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.indexed.IndexedCrawler" title="Link to this definition"></a></dt>
 931 | <dd><p>Bases: <a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><code class="xref py py-class docutils literal notranslate"><span class="pre">BaseCrawler</span></code></a></p>
 932 | <p>A crawler implementation for data sources that load into an in-memory sqlite.
 933 | Shares commonality between specialized crawlers.</p>
 934 | <p>Initialize the IndexedCrawler with a data source path and required adapter functions.</p>
 935 | <dl class="field-list simple">
 936 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 937 | <dd class="field-odd"><ul class="simple">
 938 | <li><p><strong>datasrc</strong> – path to the data source</p></li>
 939 | <li><p><strong>get_sites_func</strong> – function to retrieve sites from the data source</p></li>
 940 | <li><p><strong>get_resources_func</strong> – function to retrieve resources from the data source</p></li>
 941 | <li><p><strong>resource_field_mapping</strong> – mapping of resource field names to display names</p></li>
 942 | </ul>
 943 | </dd>
 944 | </dl>
 945 | <dl class="py method">
 946 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.indexed.IndexedCrawler.__init__">
 947 | <span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">datasrc</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_sites_func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">get_resources_func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">resource_field_mapping</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">{'content':</span> <span class="pre">'ResourcesFullText.Content',</span> <span class="pre">'created':</span> <span class="pre">'Resources.Created',</span> <span class="pre">'fulltext':</span> <span class="pre">'ResourcesFullText',</span> <span class="pre">'headers':</span> <span class="pre">'ResourcesFullText.Headers',</span> <span class="pre">'id':</span> <span class="pre">'ResourcesFullText.Id',</span> <span class="pre">'modified':</span> <span class="pre">'Resources.Modified',</span> <span class="pre">'site':</span> <span class="pre">'ResourcesFullText.Project',</span> <span class="pre">'size':</span> <span class="pre">'Resources.Size',</span> <span class="pre">'status':</span> <span class="pre">'Resources.Status',</span> <span class="pre">'time':</span> <span class="pre">'Resources.Time',</span> <span class="pre">'type':</span> <span class="pre">'ResourcesFullText.Type',</span> <span class="pre">'url':</span> <span class="pre">'ResourcesFullText.Url'}</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/indexed.html#IndexedCrawler.__init__"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.indexed.IndexedCrawler.__init__" title="Link to this definition"></a></dt>
 948 | <dd><p>Initialize the IndexedCrawler with a data source path and required adapter functions.</p>
 949 | <dl class="field-list simple">
 950 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 951 | <dd class="field-odd"><ul class="simple">
 952 | <li><p><strong>datasrc</strong> (<a class="reference external" href="https://docs.python.org/3/library/pathlib.html#pathlib.Path" title="(in Python v3.14)"><em>Path</em></a>) – path to the data source</p></li>
 953 | <li><p><strong>get_sites_func</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.14)"><em>Callable</em></a>) – function to retrieve sites from the data source</p></li>
 954 | <li><p><strong>get_resources_func</strong> (<a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.14)"><em>Callable</em></a>) – function to retrieve resources from the data source</p></li>
 955 | <li><p><strong>resource_field_mapping</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.14)"><em>dict</em></a><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a><em>]</em>) – mapping of resource field names to display names</p></li>
 956 | </ul>
 957 | </dd>
 958 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 959 | <dd class="field-even"><p>None</p>
 960 | </dd>
 961 | </dl>
 962 | </dd></dl>
 963 | 
 964 | <dl class="py method">
 965 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.indexed.IndexedCrawler.mcp_list_tools">
 966 | <em class="property"><span class="pre">async</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">mcp_list_tools</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/indexed.html#IndexedCrawler.mcp_list_tools"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.indexed.IndexedCrawler.mcp_list_tools" title="Link to this definition"></a></dt>
 967 | <dd><p>List available tools for this crawler.</p>
 968 | <dl class="field-list simple">
 969 | <dt class="field-odd">Returns<span class="colon">:</span></dt>
 970 | <dd class="field-odd"><p>List of Tool objects</p>
 971 | </dd>
 972 | <dt class="field-even">Return type<span class="colon">:</span></dt>
 973 | <dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.14)">list</a>[<em>Tool</em>]</p>
 974 | </dd>
 975 | </dl>
 976 | </dd></dl>
 977 | 
 978 | </dd></dl>
 979 | 
 980 | </section>
 981 | <section id="module-mcp_server_webcrawl.crawlers.base.tests">
 982 | <span id="mcp-server-webcrawl-crawlers-base-tests-module"></span><h2>mcp_server_webcrawl.crawlers.base.tests module<a class="headerlink" href="#module-mcp_server_webcrawl.crawlers.base.tests" title="Link to this heading"></a></h2>
 983 | <dl class="py class">
 984 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests">
 985 | <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">BaseCrawlerTests</span></span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests" title="Link to this definition"></a></dt>
 986 | <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">TestCase</span></code></p>
 987 | <p>Create an instance of the class that will use the named test
 988 | method when executed. Raises a ValueError if the instance does
 989 | not have a method with the specified name.</p>
 990 | <dl class="py method">
 991 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.setUp">
 992 | <span class="sig-name descname"><span class="pre">setUp</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.setUp"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.setUp" title="Link to this definition"></a></dt>
 993 | <dd><p>Hook method for setting up the test fixture before exercising it.</p>
 994 | </dd></dl>
 995 | 
 996 | <dl class="py method">
 997 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_search_tests">
 998 | <span class="sig-name descname"><span class="pre">run_pragmar_search_tests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_id</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_pragmar_search_tests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_search_tests" title="Link to this definition"></a></dt>
 999 | <dd><p>Run a battery of database checks on the crawler and Boolean validation</p>
1000 | <dl class="field-list simple">
1001 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1002 | <dd class="field-odd"><ul class="simple">
1003 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1004 | <li><p><strong>site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1005 | </ul>
1006 | </dd>
1007 | </dl>
1008 | </dd></dl>
1009 | 
1010 | <dl class="py method">
1011 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_image_tests">
1012 | <span class="sig-name descname"><span class="pre">run_pragmar_image_tests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pragmar_site_id</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_pragmar_image_tests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_image_tests" title="Link to this definition"></a></dt>
1013 | <dd><p>Test InterroBot-specific image handling and thumbnails.</p>
1014 | <dl class="field-list simple">
1015 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1016 | <dd class="field-odd"><ul class="simple">
1017 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1018 | <li><p><strong>pragmar_site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1019 | </ul>
1020 | </dd>
1021 | </dl>
1022 | </dd></dl>
1023 | 
1024 | <dl class="py method">
1025 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_sites_resources_tests">
1026 | <span class="sig-name descname"><span class="pre">run_sites_resources_tests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pragmar_site_id</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">example_site_id</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_sites_resources_tests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_sites_resources_tests" title="Link to this definition"></a></dt>
1027 | <dd><dl class="field-list simple">
1028 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1029 | <dd class="field-odd"><ul class="simple">
1030 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1031 | <li><p><strong>pragmar_site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1032 | <li><p><strong>example_site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1033 | </ul>
1034 | </dd>
1035 | </dl>
1036 | </dd></dl>
1037 | 
1038 | <dl class="py method">
1039 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_tokenizer_tests">
1040 | <span class="sig-name descname"><span class="pre">run_pragmar_tokenizer_tests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_id</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_pragmar_tokenizer_tests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_tokenizer_tests" title="Link to this definition"></a></dt>
1041 | <dd><p>fts hyphens and underscores are particularly challenging, thus
1042 | have a dedicated test. these must be configured in multiple places
1043 | including CREATE TABLE … tokenizer, as well as handled by the query
1044 | parser.</p>
1045 | <dl class="field-list simple">
1046 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1047 | <dd class="field-odd"><ul class="simple">
1048 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1049 | <li><p><strong>site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1050 | </ul>
1051 | </dd>
1052 | </dl>
1053 | </dd></dl>
1054 | 
1055 | <dl class="py method">
1056 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_site_tests">
1057 | <span class="sig-name descname"><span class="pre">run_pragmar_site_tests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_id</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_pragmar_site_tests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_site_tests" title="Link to this definition"></a></dt>
1058 | <dd><dl class="field-list simple">
1059 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1060 | <dd class="field-odd"><ul class="simple">
1061 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1062 | <li><p><strong>site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1063 | </ul>
1064 | </dd>
1065 | </dl>
1066 | </dd></dl>
1067 | 
1068 | <dl class="py method">
1069 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_sort_tests">
1070 | <span class="sig-name descname"><span class="pre">run_pragmar_sort_tests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_id</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_pragmar_sort_tests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_sort_tests" title="Link to this definition"></a></dt>
1071 | <dd><p>Test sorting functionality with performance optimizations.</p>
1072 | <dl class="field-list simple">
1073 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1074 | <dd class="field-odd"><ul class="simple">
1075 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1076 | <li><p><strong>site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1077 | </ul>
1078 | </dd>
1079 | </dl>
1080 | </dd></dl>
1081 | 
1082 | <dl class="py method">
1083 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_content_tests">
1084 | <span class="sig-name descname"><span class="pre">run_pragmar_content_tests</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_id</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">html_leniency</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_pragmar_content_tests"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_content_tests" title="Link to this definition"></a></dt>
1085 | <dd><dl class="field-list simple">
1086 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1087 | <dd class="field-odd"><ul class="simple">
1088 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1089 | <li><p><strong>site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1090 | <li><p><strong>html_leniency</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.14)"><em>bool</em></a>) – </p></li>
1091 | </ul>
1092 | </dd>
1093 | </dl>
1094 | </dd></dl>
1095 | 
1096 | <dl class="py method">
1097 | <dt class="sig sig-object py" id="mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_report">
1098 | <span class="sig-name descname"><span class="pre">run_pragmar_report</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">crawler</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">site_id</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">heading</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/mcp_server_webcrawl/crawlers/base/tests.html#BaseCrawlerTests.run_pragmar_report"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_report" title="Link to this definition"></a></dt>
1099 | <dd><p>Generate a comprehensive report of all resources for a site.
1100 | Returns a formatted string with counts and URLs by type.</p>
1101 | <dl class="field-list simple">
1102 | <dt class="field-odd">Parameters<span class="colon">:</span></dt>
1103 | <dd class="field-odd"><ul class="simple">
1104 | <li><p><strong>crawler</strong> (<a class="reference internal" href="#mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler" title="mcp_server_webcrawl.crawlers.base.crawler.BaseCrawler"><em>BaseCrawler</em></a>) – </p></li>
1105 | <li><p><strong>site_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.14)"><em>int</em></a>) – </p></li>
1106 | <li><p><strong>heading</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.14)"><em>str</em></a>) – </p></li>
1107 | </ul>
1108 | </dd>
1109 | </dl>
1110 | </dd></dl>
1111 | 
1112 | </dd></dl>
1113 | 
1114 | </section>
1115 | <section id="module-mcp_server_webcrawl.crawlers.base">
1116 | <span id="module-contents"></span><h2>Module contents<a class="headerlink" href="#module-mcp_server_webcrawl.crawlers.base" title="Link to this heading"></a></h2>
1117 | </section>
1118 | </section>
1119 | 
1120 | 
1121 |            </div>
1122 |           </div>
1123 |           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
1124 |         <a href="mcp_server_webcrawl.crawlers.html" class="btn btn-neutral float-left" title="mcp_server_webcrawl.crawlers package" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
1125 |         <a href="mcp_server_webcrawl.crawlers.archivebox.html" class="btn btn-neutral float-right" title="mcp_server_webcrawl.crawlers.archivebox package" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
1126 |     </div>
1127 | 
1128 |   <hr/>
1129 | 
1130 |   <div role="contentinfo">
1131 |     <p>&#169; Copyright 2025, pragmar.</p>
1132 |   </div>
1133 | 
1134 |   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
1135 |     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
1136 |     provided by <a href="https://readthedocs.org">Read the Docs</a>.
1137 |    
1138 | 
1139 | </footer>
1140 |         </div>
1141 |       </div>
1142 |     </section>
1143 |   </div>
1144 |   <script>
1145 |       jQuery(function () {
1146 |           SphinxRtdTheme.Navigation.enable(true);
1147 |       });
1148 |   </script> 
1149 | 
1150 | </body>
1151 | </html>
```
Page 31/35FirstPrevNextLast