#
tokens: 59912/50000 1/216 files (page 34/35)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 34 of 35. Use http://codebase.md/pragmar/mcp_server_webcrawl/crawlers/warc/tests.html?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .gitignore
├── CONTRIBUTING.md
├── docs
│   ├── _images
│   │   ├── interactive.document.webp
│   │   ├── interactive.search.webp
│   │   └── mcpswc.svg
│   ├── _modules
│   │   ├── index.html
│   │   ├── mcp_server_webcrawl
│   │   │   ├── crawlers
│   │   │   │   ├── archivebox
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── base
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── api.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   ├── indexed.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── httrack
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── interrobot
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── katana
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── siteone
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── warc
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   └── wget
│   │   │   │       ├── adapter.html
│   │   │   │       ├── crawler.html
│   │   │   │       └── tests.html
│   │   │   ├── crawlers.html
│   │   │   ├── extras
│   │   │   │   ├── markdown.html
│   │   │   │   ├── regex.html
│   │   │   │   ├── snippets.html
│   │   │   │   ├── thumbnails.html
│   │   │   │   └── xpath.html
│   │   │   ├── interactive
│   │   │   │   ├── highlights.html
│   │   │   │   ├── search.html
│   │   │   │   ├── session.html
│   │   │   │   └── ui.html
│   │   │   ├── main.html
│   │   │   ├── models
│   │   │   │   ├── resources.html
│   │   │   │   └── sites.html
│   │   │   ├── templates
│   │   │   │   └── tests.html
│   │   │   ├── utils
│   │   │   │   ├── blobs.html
│   │   │   │   ├── cli.html
│   │   │   │   ├── logger.html
│   │   │   │   ├── querycache.html
│   │   │   │   ├── server.html
│   │   │   │   └── tools.html
│   │   │   └── utils.html
│   │   └── re.html
│   ├── _sources
│   │   ├── guides
│   │   │   ├── archivebox.rst.txt
│   │   │   ├── httrack.rst.txt
│   │   │   ├── interrobot.rst.txt
│   │   │   ├── katana.rst.txt
│   │   │   ├── siteone.rst.txt
│   │   │   ├── warc.rst.txt
│   │   │   └── wget.rst.txt
│   │   ├── guides.rst.txt
│   │   ├── index.rst.txt
│   │   ├── installation.rst.txt
│   │   ├── interactive.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.archivebox.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.base.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.httrack.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.interrobot.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.katana.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.siteone.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.warc.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.wget.rst.txt
│   │   ├── mcp_server_webcrawl.extras.rst.txt
│   │   ├── mcp_server_webcrawl.interactive.rst.txt
│   │   ├── mcp_server_webcrawl.models.rst.txt
│   │   ├── mcp_server_webcrawl.rst.txt
│   │   ├── mcp_server_webcrawl.templates.rst.txt
│   │   ├── mcp_server_webcrawl.utils.rst.txt
│   │   ├── modules.rst.txt
│   │   ├── prompts.rst.txt
│   │   └── usage.rst.txt
│   ├── _static
│   │   ├── _sphinx_javascript_frameworks_compat.js
│   │   ├── basic.css
│   │   ├── css
│   │   │   ├── badge_only.css
│   │   │   ├── fonts
│   │   │   │   ├── fontawesome-webfont.eot
│   │   │   │   ├── fontawesome-webfont.svg
│   │   │   │   ├── fontawesome-webfont.ttf
│   │   │   │   ├── fontawesome-webfont.woff
│   │   │   │   ├── fontawesome-webfont.woff2
│   │   │   │   ├── lato-bold-italic.woff
│   │   │   │   ├── lato-bold-italic.woff2
│   │   │   │   ├── lato-bold.woff
│   │   │   │   ├── lato-bold.woff2
│   │   │   │   ├── lato-normal-italic.woff
│   │   │   │   ├── lato-normal-italic.woff2
│   │   │   │   ├── lato-normal.woff
│   │   │   │   ├── lato-normal.woff2
│   │   │   │   ├── Roboto-Slab-Bold.woff
│   │   │   │   ├── Roboto-Slab-Bold.woff2
│   │   │   │   ├── Roboto-Slab-Regular.woff
│   │   │   │   └── Roboto-Slab-Regular.woff2
│   │   │   └── theme.css
│   │   ├── doctools.js
│   │   ├── documentation_options.js
│   │   ├── file.png
│   │   ├── fonts
│   │   │   ├── Lato
│   │   │   │   ├── lato-bold.eot
│   │   │   │   ├── lato-bold.ttf
│   │   │   │   ├── lato-bold.woff
│   │   │   │   ├── lato-bold.woff2
│   │   │   │   ├── lato-bolditalic.eot
│   │   │   │   ├── lato-bolditalic.ttf
│   │   │   │   ├── lato-bolditalic.woff
│   │   │   │   ├── lato-bolditalic.woff2
│   │   │   │   ├── lato-italic.eot
│   │   │   │   ├── lato-italic.ttf
│   │   │   │   ├── lato-italic.woff
│   │   │   │   ├── lato-italic.woff2
│   │   │   │   ├── lato-regular.eot
│   │   │   │   ├── lato-regular.ttf
│   │   │   │   ├── lato-regular.woff
│   │   │   │   └── lato-regular.woff2
│   │   │   └── RobotoSlab
│   │   │       ├── roboto-slab-v7-bold.eot
│   │   │       ├── roboto-slab-v7-bold.ttf
│   │   │       ├── roboto-slab-v7-bold.woff
│   │   │       ├── roboto-slab-v7-bold.woff2
│   │   │       ├── roboto-slab-v7-regular.eot
│   │   │       ├── roboto-slab-v7-regular.ttf
│   │   │       ├── roboto-slab-v7-regular.woff
│   │   │       └── roboto-slab-v7-regular.woff2
│   │   ├── images
│   │   │   ├── interactive.document.png
│   │   │   ├── interactive.document.webp
│   │   │   ├── interactive.search.png
│   │   │   ├── interactive.search.webp
│   │   │   └── mcpswc.svg
│   │   ├── jquery.js
│   │   ├── js
│   │   │   ├── badge_only.js
│   │   │   ├── theme.js
│   │   │   └── versions.js
│   │   ├── language_data.js
│   │   ├── minus.png
│   │   ├── plus.png
│   │   ├── pygments.css
│   │   ├── searchtools.js
│   │   └── sphinx_highlight.js
│   ├── .buildinfo
│   ├── .nojekyll
│   ├── genindex.html
│   ├── guides
│   │   ├── archivebox.html
│   │   ├── httrack.html
│   │   ├── interrobot.html
│   │   ├── katana.html
│   │   ├── siteone.html
│   │   ├── warc.html
│   │   └── wget.html
│   ├── guides.html
│   ├── index.html
│   ├── installation.html
│   ├── interactive.html
│   ├── mcp_server_webcrawl.crawlers.archivebox.html
│   ├── mcp_server_webcrawl.crawlers.base.html
│   ├── mcp_server_webcrawl.crawlers.html
│   ├── mcp_server_webcrawl.crawlers.httrack.html
│   ├── mcp_server_webcrawl.crawlers.interrobot.html
│   ├── mcp_server_webcrawl.crawlers.katana.html
│   ├── mcp_server_webcrawl.crawlers.siteone.html
│   ├── mcp_server_webcrawl.crawlers.warc.html
│   ├── mcp_server_webcrawl.crawlers.wget.html
│   ├── mcp_server_webcrawl.extras.html
│   ├── mcp_server_webcrawl.html
│   ├── mcp_server_webcrawl.interactive.html
│   ├── mcp_server_webcrawl.models.html
│   ├── mcp_server_webcrawl.templates.html
│   ├── mcp_server_webcrawl.utils.html
│   ├── modules.html
│   ├── objects.inv
│   ├── prompts.html
│   ├── py-modindex.html
│   ├── search.html
│   ├── searchindex.js
│   └── usage.html
├── LICENSE
├── MANIFEST.in
├── prompts
│   ├── audit404.md
│   ├── auditfiles.md
│   ├── auditperf.md
│   ├── auditseo.md
│   ├── gopher.md
│   ├── README.md
│   └── testsearch.md
├── pyproject.toml
├── README.md
├── setup.py
├── sphinx
│   ├── _static
│   │   └── images
│   │       ├── interactive.document.png
│   │       ├── interactive.document.webp
│   │       ├── interactive.search.png
│   │       ├── interactive.search.webp
│   │       └── mcpswc.svg
│   ├── _templates
│   │   └── layout.html
│   ├── conf.py
│   ├── guides
│   │   ├── archivebox.rst
│   │   ├── httrack.rst
│   │   ├── interrobot.rst
│   │   ├── katana.rst
│   │   ├── siteone.rst
│   │   ├── warc.rst
│   │   └── wget.rst
│   ├── guides.rst
│   ├── index.rst
│   ├── installation.rst
│   ├── interactive.rst
│   ├── make.bat
│   ├── Makefile
│   ├── mcp_server_webcrawl.crawlers.archivebox.rst
│   ├── mcp_server_webcrawl.crawlers.base.rst
│   ├── mcp_server_webcrawl.crawlers.httrack.rst
│   ├── mcp_server_webcrawl.crawlers.interrobot.rst
│   ├── mcp_server_webcrawl.crawlers.katana.rst
│   ├── mcp_server_webcrawl.crawlers.rst
│   ├── mcp_server_webcrawl.crawlers.siteone.rst
│   ├── mcp_server_webcrawl.crawlers.warc.rst
│   ├── mcp_server_webcrawl.crawlers.wget.rst
│   ├── mcp_server_webcrawl.extras.rst
│   ├── mcp_server_webcrawl.interactive.rst
│   ├── mcp_server_webcrawl.models.rst
│   ├── mcp_server_webcrawl.rst
│   ├── mcp_server_webcrawl.templates.rst
│   ├── mcp_server_webcrawl.utils.rst
│   ├── modules.rst
│   ├── prompts.rst
│   ├── readme.txt
│   └── usage.rst
└── src
    └── mcp_server_webcrawl
        ├── __init__.py
        ├── crawlers
        │   ├── __init__.py
        │   ├── archivebox
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── base
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── api.py
        │   │   ├── crawler.py
        │   │   ├── indexed.py
        │   │   └── tests.py
        │   ├── httrack
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── interrobot
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── katana
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── siteone
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── warc
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   └── wget
        │       ├── __init__.py
        │       ├── adapter.py
        │       ├── crawler.py
        │       └── tests.py
        ├── extras
        │   ├── __init__.py
        │   ├── markdown.py
        │   ├── regex.py
        │   ├── snippets.py
        │   ├── thumbnails.py
        │   └── xpath.py
        ├── interactive
        │   ├── __init__.py
        │   ├── highlights.py
        │   ├── search.py
        │   ├── session.py
        │   ├── ui.py
        │   └── views
        │       ├── base.py
        │       ├── document.py
        │       ├── help.py
        │       ├── requirements.py
        │       ├── results.py
        │       └── searchform.py
        ├── main.py
        ├── models
        │   ├── __init__.py
        │   ├── base.py
        │   ├── resources.py
        │   └── sites.py
        ├── settings.py
        ├── templates
        │   ├── __init__.py
        │   ├── markdown.xslt
        │   ├── tests_core.html
        │   └── tests.py
        └── utils
            ├── __init__.py
            ├── cli.py
            ├── logger.py
            ├── parser.py
            ├── parsetab.py
            ├── search.py
            ├── server.py
            ├── tests.py
            └── tools.py
```

# Files

--------------------------------------------------------------------------------
/docs/_modules/mcp_server_webcrawl/crawlers/base/tests.html:
--------------------------------------------------------------------------------

```html
   1 | 
   2 | 
   3 | <!DOCTYPE html>
   4 | <html class="writer-html5" lang="en" data-content_root="../../../../">
   5 | <head>
   6 |   <meta charset="utf-8" />
   7 |   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
   8 |   <title>mcp_server_webcrawl.crawlers.base.tests &mdash; mcp-server-webcrawl  documentation</title>
   9 |       <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=80d5e7a1" />
  10 |       <link rel="stylesheet" type="text/css" href="../../../../_static/css/theme.css?v=e59714d7" />
  11 | 
  12 |   
  13 |       <script src="../../../../_static/jquery.js?v=5d32c60e"></script>
  14 |       <script src="../../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
  15 |       <script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
  16 |       <script src="../../../../_static/doctools.js?v=888ff710"></script>
  17 |       <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
  18 |     <script src="../../../../_static/js/theme.js"></script>
  19 |     <link rel="index" title="Index" href="../../../../genindex.html" />
  20 |     <link rel="search" title="Search" href="../../../../search.html" /> 
  21 | </head>
  22 | 
  23 | <body class="wy-body-for-nav"> 
  24 |   <div class="wy-grid-for-nav">
  25 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
  26 |       <div class="wy-side-scroll">
  27 |         <div class="wy-side-nav-search" >
  28 | 
  29 |           
  30 |           
  31 |           <a href="../../../../index.html" class="icon icon-home">
  32 |             mcp-server-webcrawl
  33 |           </a>
  34 | <div role="search">
  35 |   <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
  36 |     <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
  37 |     <input type="hidden" name="check_keywords" value="yes" />
  38 |     <input type="hidden" name="area" value="default" />
  39 |   </form>
  40 | </div>
  41 |         </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
  42 |               <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
  43 | <ul>
  44 | <li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li>
  45 | <li class="toctree-l1"><a class="reference internal" href="../../../../guides.html">Setup Guides</a></li>
  46 | <li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
  47 | <li class="toctree-l1"><a class="reference internal" href="../../../../prompts.html">Prompt Routines</a></li>
  48 | <li class="toctree-l1"><a class="reference internal" href="../../../../interactive.html">Interactive Mode</a></li>
  49 | <li class="toctree-l1"><a class="reference internal" href="../../../../modules.html">mcp_server_webcrawl</a></li>
  50 | </ul>
  51 | 
  52 |         </div>
  53 |       </div>
  54 |     </nav>
  55 | 
  56 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
  57 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
  58 |           <a href="../../../../index.html">mcp-server-webcrawl</a>
  59 |       </nav>
  60 | 
  61 |       <div class="wy-nav-content">
  62 |         <div class="rst-content">
  63 |           <div role="navigation" aria-label="Page navigation">
  64 |   <ul class="wy-breadcrumbs">
  65 |       <li><a href="../../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
  66 |           <li class="breadcrumb-item"><a href="../../../index.html">Module code</a></li>
  67 |           <li class="breadcrumb-item"><a href="../../crawlers.html">mcp_server_webcrawl.crawlers</a></li>
  68 |       <li class="breadcrumb-item active">mcp_server_webcrawl.crawlers.base.tests</li>
  69 |       <li class="wy-breadcrumbs-aside">
  70 |       </li>
  71 |   </ul>
  72 |   <hr/>
  73 | </div>
  74 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
  75 |            <div itemprop="articleBody">
  76 |              
  77 |   <h1>Source code for mcp_server_webcrawl.crawlers.base.tests</h1><div class="highlight"><pre>
  78 | <span></span><span class="kn">import</span> <span class="nn">sys</span>
  79 | <span class="kn">import</span> <span class="nn">unittest</span>
  80 | <span class="kn">import</span> <span class="nn">asyncio</span>
  81 | 
  82 | <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Final</span>
  83 | <span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
  84 | <span class="kn">from</span> <span class="nn">logging</span> <span class="kn">import</span> <span class="n">Logger</span>
  85 | 
  86 | <span class="kn">from</span> <span class="nn">mcp_server_webcrawl.crawlers.base.crawler</span> <span class="kn">import</span> <span class="n">BaseCrawler</span>
  87 | <span class="kn">from</span> <span class="nn">mcp_server_webcrawl.crawlers.wget.crawler</span> <span class="kn">import</span> <span class="n">WgetCrawler</span>
  88 | <span class="kn">from</span> <span class="nn">mcp_server_webcrawl.models.resources</span> <span class="kn">import</span> <span class="n">ResourceResultType</span>
  89 | <span class="kn">from</span> <span class="nn">mcp_server_webcrawl.crawlers.base.api</span> <span class="kn">import</span> <span class="n">BaseJsonApi</span>
  90 | <span class="kn">from</span> <span class="nn">mcp_server_webcrawl.utils.logger</span> <span class="kn">import</span> <span class="n">get_logger</span>
  91 | 
  92 | <span class="n">logger</span><span class="p">:</span> <span class="n">Logger</span> <span class="o">=</span> <span class="n">get_logger</span><span class="p">()</span>
  93 | 
  94 | 
  95 | <div class="viewcode-block" id="BaseCrawlerTests">
  96 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests">[docs]</a>
  97 | <span class="k">class</span> <span class="nc">BaseCrawlerTests</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>
  98 | 
  99 |     <span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">:</span> <span class="n">Final</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;crawler&quot;</span>
 100 |     <span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="p">:</span> <span class="n">Final</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;privacy&quot;</span>
 101 |     <span class="n">__PRAGMAR_HYPHENATED_KEYWORD</span><span class="p">:</span> <span class="n">Final</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;one-click&quot;</span>
 102 | 
 103 | <div class="viewcode-block" id="BaseCrawlerTests.setUp">
 104 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.setUp">[docs]</a>
 105 |     <span class="k">def</span> <span class="nf">setUp</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 106 |         <span class="c1"># quiet asyncio error on tests, occurring after sucessful completion</span>
 107 |         <span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">platform</span> <span class="o">==</span> <span class="s2">&quot;win32&quot;</span><span class="p">:</span>
 108 |             <span class="n">asyncio</span><span class="o">.</span><span class="n">set_event_loop_policy</span><span class="p">(</span><span class="n">asyncio</span><span class="o">.</span><span class="n">WindowsSelectorEventLoopPolicy</span><span class="p">())</span></div>
 109 | 
 110 | 
 111 | 
 112 | <div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_search_tests">
 113 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_search_tests">[docs]</a>
 114 |     <span class="k">def</span> <span class="nf">run_pragmar_search_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
 115 | <span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
 116 | <span class="sd">        Run a battery of database checks on the crawler and Boolean validation</span>
 117 | <span class="sd">        &quot;&quot;&quot;</span>
 118 | 
 119 |         <span class="n">resources_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">()</span>
 120 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should have some resources in database&quot;</span><span class="p">)</span>
 121 | 
 122 |         <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">])</span>
 123 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Pragmar site should have resources&quot;</span><span class="p">)</span>
 124 | 
 125 |         <span class="n">primary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 126 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 127 |             <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">,</span>
 128 |             <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">],</span>
 129 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 130 |         <span class="p">)</span>
 131 | 
 132 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Keyword &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">&#39; should return results&quot;</span><span class="p">)</span>
 133 | 
 134 |         <span class="n">secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 135 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 136 |             <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="p">,</span>
 137 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 138 |         <span class="p">)</span>
 139 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Keyword &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">&#39; should return results&quot;</span><span class="p">)</span>
 140 | 
 141 |         <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_fulltext</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">)</span>
 142 |         <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_status</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
 143 |         <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_headers</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
 144 |         <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_content</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
 145 |         <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_type</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">)</span>
 146 |         <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_extras</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">,</span> <span class="n">primary_resources</span><span class="p">,</span> <span class="n">secondary_resources</span><span class="p">)</span></div>
 147 | 
 148 | 
 149 | 
 150 | <div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_image_tests">
 151 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_image_tests">[docs]</a>
 152 |     <span class="k">def</span> <span class="nf">run_pragmar_image_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
 153 | <span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
 154 | <span class="sd">        Test InterroBot-specific image handling and thumbnails.</span>
 155 | <span class="sd">        &quot;&quot;&quot;</span>
 156 |         <span class="n">img_results</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: img&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
 157 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">img_results</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Image type filter should return results&quot;</span><span class="p">)</span>
 158 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 159 |             <span class="nb">all</span><span class="p">(</span><span class="n">r</span><span class="o">.</span><span class="n">type</span><span class="o">.</span><span class="n">value</span> <span class="o">==</span> <span class="s2">&quot;img&quot;</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">img_results</span><span class="o">.</span><span class="n">_results</span><span class="p">),</span>
 160 |             <span class="s2">&quot;All filtered resources should have type &#39;img&#39;&quot;</span>
 161 |         <span class="p">)</span></div>
 162 | 
 163 | 
 164 | <div class="viewcode-block" id="BaseCrawlerTests.run_sites_resources_tests">
 165 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_sites_resources_tests">[docs]</a>
 166 |     <span class="k">def</span> <span class="nf">run_sites_resources_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">example_site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
 167 | 
 168 |         <span class="n">resources_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">()</span>
 169 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should have some resources in database&quot;</span><span class="p">)</span>
 170 | 
 171 |         <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">])</span>
 172 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Pragmar site should have resources&quot;</span><span class="p">)</span>
 173 | 
 174 |         <span class="c1"># basic resource retrieval</span>
 175 |         <span class="n">resources_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">()</span>
 176 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span>
 177 | 
 178 |         <span class="c1"># fulltext keyword search</span>
 179 |         <span class="n">query_keyword1</span> <span class="o">=</span> <span class="s2">&quot;privacy&quot;</span>
 180 | 
 181 |         <span class="n">timestamp_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 182 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
 183 |             <span class="n">query</span><span class="o">=</span><span class="n">query_keyword1</span><span class="p">,</span>
 184 |             <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;created&quot;</span><span class="p">,</span> <span class="s2">&quot;modified&quot;</span><span class="p">,</span> <span class="s2">&quot;time&quot;</span><span class="p">],</span>
 185 |             <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
 186 |         <span class="p">)</span>
 187 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">timestamp_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Search query should return results&quot;</span><span class="p">)</span>
 188 |         <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">timestamp_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 189 |             <span class="n">resource_dict</span> <span class="o">=</span> <span class="n">resource</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
 190 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertIsNotNone</span><span class="p">(</span><span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;created&quot;</span><span class="p">],</span> <span class="s2">&quot;Created timestamp should not be None&quot;</span><span class="p">)</span>
 191 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertIsNotNone</span><span class="p">(</span><span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;modified&quot;</span><span class="p">],</span> <span class="s2">&quot;Modified timestamp should not be None&quot;</span><span class="p">)</span>
 192 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertIsNotNone</span><span class="p">(</span><span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;time&quot;</span><span class="p">],</span> <span class="s2">&quot;Modified timestamp should not be None&quot;</span><span class="p">)</span>
 193 | 
 194 |         <span class="c1"># resource ID filtering</span>
 195 |         <span class="k">if</span> <span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
 196 |             <span class="n">first_resource</span> <span class="o">=</span> <span class="n">resources_json</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
 197 |             <span class="n">id_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 198 |                 <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">first_resource</span><span class="o">.</span><span class="n">site</span><span class="p">],</span>
 199 |                 <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;id: </span><span class="si">{</span><span class="n">first_resource</span><span class="o">.</span><span class="n">id</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 200 |                 <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 201 |             <span class="p">)</span>
 202 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">id_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
 203 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">id_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">id</span><span class="p">,</span> <span class="n">first_resource</span><span class="o">.</span><span class="n">id</span><span class="p">)</span>
 204 | 
 205 |         <span class="c1"># site filtering</span>
 206 |         <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">])</span>
 207 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Site filtering should return results&quot;</span><span class="p">)</span>
 208 |         <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 209 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">site</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">)</span>
 210 | 
 211 |         <span class="c1"># type filtering for HTML pages</span>
 212 |         <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 213 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
 214 |             <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 215 |         <span class="p">)</span>
 216 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;HTML filtering should return results&quot;</span><span class="p">)</span>
 217 |         <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 218 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="p">)</span>
 219 | 
 220 |         <span class="c1"># type filtering for multiple resource types</span>
 221 |         <span class="n">mixed_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 222 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
 223 |             <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2"> OR type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 224 |         <span class="p">)</span>
 225 |         <span class="k">if</span> <span class="n">mixed_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
 226 |             <span class="n">types_found</span> <span class="o">=</span> <span class="p">{</span><span class="n">r</span><span class="o">.</span><span class="n">type</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">mixed_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">}</span>
 227 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 228 |                 <span class="nb">len</span><span class="p">(</span><span class="n">types_found</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span>
 229 |                 <span class="s2">&quot;Should find at least one of the requested resource types&quot;</span>
 230 |             <span class="p">)</span>
 231 |             <span class="k">for</span> <span class="n">resource_type</span> <span class="ow">in</span> <span class="n">types_found</span><span class="p">:</span>
 232 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span>
 233 |                     <span class="n">resource_type</span><span class="p">,</span>
 234 |                     <span class="p">[</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="p">]</span>
 235 |                 <span class="p">)</span>
 236 | 
 237 |         <span class="c1"># custom fields in response</span>
 238 |         <span class="n">custom_fields</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">,</span> <span class="s2">&quot;time&quot;</span><span class="p">]</span>
 239 |         <span class="n">field_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 240 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html&quot;</span><span class="p">,</span>
 241 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
 242 |             <span class="n">fields</span><span class="o">=</span><span class="n">custom_fields</span><span class="p">,</span>
 243 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 244 |         <span class="p">)</span>
 245 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">field_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span>
 246 |         <span class="n">resource_dict</span> <span class="o">=</span> <span class="n">field_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
 247 |         <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">custom_fields</span><span class="p">:</span>
 248 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="n">field</span><span class="p">,</span> <span class="n">resource_dict</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Field &#39;</span><span class="si">{</span><span class="n">field</span><span class="si">}</span><span class="s2">&#39; should be in response&quot;</span><span class="p">)</span>
 249 | 
 250 |         <span class="n">asc_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">)</span>
 251 |         <span class="k">if</span> <span class="n">asc_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
 252 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">asc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">url</span> <span class="o">&lt;=</span> <span class="n">asc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">)</span>
 253 | 
 254 |         <span class="n">desc_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-url&quot;</span><span class="p">)</span>
 255 |         <span class="k">if</span> <span class="n">desc_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
 256 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">desc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">url</span> <span class="o">&gt;=</span> <span class="n">desc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">)</span>
 257 | 
 258 |         <span class="n">limit_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
 259 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">limit_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="mi">3</span><span class="p">)</span>
 260 | 
 261 |         <span class="n">offset_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">offset</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
 262 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">offset_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="mi">2</span><span class="p">)</span>
 263 |         <span class="k">if</span> <span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">4</span><span class="p">:</span>
 264 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span>
 265 |                 <span class="n">resources_json</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">id</span><span class="p">,</span>
 266 |                 <span class="n">offset_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">id</span><span class="p">,</span>
 267 |                 <span class="s2">&quot;Offset results should differ from first page&quot;</span>
 268 |             <span class="p">)</span>
 269 | 
 270 |         <span class="c1"># multi-site search, verify we got results from both sites</span>
 271 |         <span class="c1"># limit 100 sees all the pages, otherwise ArchiveBox needs -url</span>
 272 |         <span class="c1"># and everything else +url to float unique sites in a small result set</span>
 273 |         <span class="c1"># limit 100 is slower but more resilient</span>
 274 |         <span class="n">multisite_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 275 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">example_site_id</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">],</span>
 276 |             <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 277 |             <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">,</span>
 278 |             <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
 279 |         <span class="p">)</span>
 280 | 
 281 |         <span class="n">found_sites</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
 282 |         <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">multisite_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 283 |             <span class="n">found_sites</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">site</span><span class="p">)</span>
 284 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">found_sites</span><span class="p">),</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;Should have results from both sites&quot;</span><span class="p">)</span></div>
 285 | 
 286 | 
 287 | <div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_tokenizer_tests">
 288 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_tokenizer_tests">[docs]</a>
 289 |     <span class="k">def</span> <span class="nf">run_pragmar_tokenizer_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span><span class="nb">int</span><span class="p">):</span>
 290 | <span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
 291 | <span class="sd">        fts hyphens and underscores are particularly challenging, thus</span>
 292 | <span class="sd">        have a dedicated test. these must be configured in multiple places</span>
 293 | <span class="sd">        including CREATE TABLE ... tokenizer, as well as handled by the query</span>
 294 | <span class="sd">        parser.</span>
 295 | <span class="sd">        &quot;&quot;&quot;</span>
 296 | 
 297 |         <span class="n">mcp_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 298 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 299 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot;&#39;</span><span class="p">,</span>
 300 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 301 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 302 |         <span class="p">)</span>
 303 |         <span class="n">mcp_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 304 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 305 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot;&#39;</span><span class="p">,</span>
 306 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 307 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 308 |         <span class="p">)</span>
 309 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
 310 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
 311 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>
 312 |         <span class="n">mcp_resources_wildcarded</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 313 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 314 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp*&#39;</span><span class="p">,</span>
 315 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 316 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 317 |         <span class="p">)</span>
 318 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_wildcarded</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-* in HTML&quot;</span><span class="p">)</span>
 319 | 
 320 |         <span class="n">combo_and_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 321 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 322 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot; AND &quot;one-click&quot;&#39;</span><span class="p">,</span>
 323 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 324 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 325 |         <span class="p">)</span>
 326 |         <span class="n">combo_and_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 327 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 328 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl AND one-click&#39;</span><span class="p">,</span>
 329 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 330 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 331 |         <span class="p">)</span>
 332 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_and_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
 333 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_and_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
 334 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_and_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_and_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>
 335 | 
 336 |         <span class="n">combo_or_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 337 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 338 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot; OR &quot;one-click&quot;&#39;</span><span class="p">,</span>
 339 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 340 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 341 |         <span class="p">)</span>
 342 |         <span class="n">combo_or_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 343 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 344 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl OR one-click&#39;</span><span class="p">,</span>
 345 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 346 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 347 |         <span class="p">)</span>
 348 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_or_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
 349 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_or_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
 350 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_or_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_or_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>
 351 | 
 352 |         <span class="n">combo_not_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 353 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 354 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot; NOT &quot;one-click&quot;&#39;</span><span class="p">,</span>
 355 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 356 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 357 |         <span class="p">)</span>
 358 |         <span class="n">combo_not_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 359 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 360 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl NOT one-click&#39;</span><span class="p">,</span>
 361 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 362 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 363 |         <span class="p">)</span>
 364 |         <span class="n">combo_and_not_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 365 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 366 |             <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl AND NOT one-click&#39;</span><span class="p">,</span>
 367 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 368 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 369 |         <span class="p">)</span>
 370 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
 371 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
 372 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_not_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>
 373 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_and_not_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;NOT (</span><span class="si">{</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">) and AND NOT (</span><span class="si">{</span><span class="n">combo_and_not_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">) equivalence expected&quot;</span><span class="p">)</span>
 374 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">combo_and_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Total records should be greater or equal to ANDs.&quot;</span><span class="p">)</span>
 375 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">combo_or_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Total records should be less than or equal to ORs.&quot;</span><span class="p">)</span>
 376 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Total records should be greater than NOTs.&quot;</span><span class="p">)</span></div>
 377 | 
 378 | 
 379 | 
 380 | 
 381 | <div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_site_tests">
 382 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_site_tests">[docs]</a>
 383 |     <span class="k">def</span> <span class="nf">run_pragmar_site_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span><span class="nb">int</span><span class="p">):</span>
 384 | 
 385 |         <span class="c1"># all sites</span>
 386 |         <span class="n">sites_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_sites_api</span><span class="p">()</span>
 387 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">sites_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">2</span><span class="p">)</span>
 388 | 
 389 |         <span class="c1"># single site</span>
 390 |         <span class="n">site_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_sites_api</span><span class="p">(</span><span class="n">ids</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">])</span>
 391 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_json</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span>
 392 | 
 393 |         <span class="c1"># site with fields</span>
 394 |         <span class="n">site_field_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_sites_api</span><span class="p">(</span><span class="n">ids</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;created&quot;</span><span class="p">,</span> <span class="s2">&quot;modified&quot;</span><span class="p">])</span>
 395 |         <span class="n">site_field_result</span> <span class="o">=</span> <span class="n">site_field_json</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
 396 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="s2">&quot;created&quot;</span> <span class="ow">in</span> <span class="n">site_field_result</span><span class="p">)</span>
 397 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="s2">&quot;modified&quot;</span> <span class="ow">in</span> <span class="n">site_field_result</span><span class="p">)</span></div>
 398 | 
 399 | 
 400 | <div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_sort_tests">
 401 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_sort_tests">[docs]</a>
 402 |     <span class="k">def</span> <span class="nf">run_pragmar_sort_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
 403 | <span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
 404 | <span class="sd">        Test sorting functionality with performance optimizations.</span>
 405 | <span class="sd">        &quot;&quot;&quot;</span>
 406 |         <span class="n">sorted_default</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
 407 |         <span class="n">sorted_url_ascending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
 408 |         <span class="n">sorted_url_descending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-url&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
 409 | 
 410 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Database should contain resources&quot;</span><span class="p">)</span>
 411 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Database should contain resources&quot;</span><span class="p">)</span>
 412 |         <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_default</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
 413 |             <span class="n">default_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">r</span><span class="o">.</span><span class="n">url</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">sorted_default</span><span class="o">.</span><span class="n">_results</span><span class="p">]</span>
 414 |             <span class="n">ascending_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">r</span><span class="o">.</span><span class="n">url</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">]</span>
 415 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">default_urls</span><span class="p">,</span> <span class="n">ascending_urls</span><span class="p">,</span> <span class="s2">&quot;Default sort should match +url sort&quot;</span><span class="p">)</span>
 416 | 
 417 |         <span class="n">sorted_size_ascending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+size&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;size&quot;</span><span class="p">])</span>
 418 |         <span class="n">sorted_size_descending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-size&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;size&quot;</span><span class="p">])</span>
 419 |         <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
 420 |             <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
 421 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertLessEqual</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span>
 422 |                         <span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="s2">&quot;URLs should be ascending&quot;</span><span class="p">)</span>
 423 |         <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
 424 |             <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
 425 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span>
 426 |                         <span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="s2">&quot;URLs should be descending&quot;</span><span class="p">)</span>
 427 |         <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
 428 |             <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
 429 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertLessEqual</span><span class="p">(</span><span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span>
 430 |                         <span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span> <span class="s2">&quot;Sizes should be ascending&quot;</span><span class="p">)</span>
 431 |         <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
 432 |             <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
 433 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span>
 434 |                         <span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span> <span class="s2">&quot;Sizes should be descending&quot;</span><span class="p">)</span>
 435 | 
 436 |         <span class="n">random_1</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;?&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
 437 |         <span class="n">random_2</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;?&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
 438 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">random_1</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Random sort should return results&quot;</span><span class="p">)</span>
 439 |         <span class="k">if</span> <span class="n">random_1</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">10</span><span class="p">:</span>
 440 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">([</span><span class="n">r</span><span class="o">.</span><span class="n">id</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">random_1</span><span class="o">.</span><span class="n">_results</span><span class="p">],</span> <span class="p">[</span><span class="n">r</span><span class="o">.</span><span class="n">id</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">random_2</span><span class="o">.</span><span class="n">_results</span><span class="p">],</span>
 441 |                             <span class="s2">&quot;Random sort should produce different orders&quot;</span><span class="p">)</span>
 442 |         <span class="k">else</span><span class="p">:</span>
 443 |             <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Skip randomness verification: Not enough resources (</span><span class="si">{</span><span class="n">random_1</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">)</span></div>
 444 | 
 445 | 
 446 | <div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_content_tests">
 447 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_content_tests">[docs]</a>
 448 |     <span class="k">def</span> <span class="nf">run_pragmar_content_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">html_leniency</span><span class="p">:</span> <span class="nb">bool</span><span class="p">):</span>
 449 | 
 450 |         <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 451 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 452 |             <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 453 |             <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">]</span>
 454 |         <span class="p">)</span>
 455 | 
 456 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find HTML resources&quot;</span><span class="p">)</span>
 457 |         <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 458 |             <span class="n">resource_dict</span> <span class="o">=</span> <span class="n">resource</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
 459 |             <span class="k">if</span> <span class="s2">&quot;content&quot;</span> <span class="ow">in</span> <span class="n">resource_dict</span><span class="p">:</span>
 460 |                 <span class="n">content</span> <span class="o">=</span>  <span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
 461 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 462 |                     <span class="s2">&quot;&lt;!DOCTYPE html&gt;&quot;</span> <span class="ow">in</span> <span class="n">content</span> <span class="ow">or</span>
 463 |                     <span class="s2">&quot;&lt;html&quot;</span> <span class="ow">in</span> <span class="n">content</span> <span class="ow">or</span>
 464 |                     <span class="s2">&quot;&lt;meta&quot;</span> <span class="ow">in</span> <span class="n">content</span> <span class="ow">or</span>
 465 |                     <span class="n">html_leniency</span><span class="p">,</span>
 466 |                     <span class="sa">f</span><span class="s2">&quot;HTML content should contain HTML markup: </span><span class="si">{</span><span class="n">resource</span><span class="o">.</span><span class="n">url</span><span class="si">}</span><span class="se">\n\n</span><span class="si">{</span><span class="n">resource</span><span class="o">.</span><span class="n">content</span><span class="si">}</span><span class="s2">&quot;</span>
 467 |                 <span class="p">)</span>
 468 | 
 469 |             <span class="k">if</span> <span class="s2">&quot;headers&quot;</span> <span class="ow">in</span> <span class="n">resource_dict</span> <span class="ow">and</span> <span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]:</span>
 470 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 471 |                     <span class="s2">&quot;Content-Type:&quot;</span> <span class="ow">in</span> <span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">],</span>
 472 |                     <span class="sa">f</span><span class="s2">&quot;Headers should contain Content-Type: </span><span class="si">{</span><span class="n">resource</span><span class="o">.</span><span class="n">url</span><span class="si">}</span><span class="s2">&quot;</span>
 473 |                 <span class="p">)</span>
 474 | 
 475 |         <span class="c1"># script content detection</span>
 476 |         <span class="n">script_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 477 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 478 |             <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 479 |             <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">],</span>
 480 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 481 |         <span class="p">)</span>
 482 |         <span class="k">if</span> <span class="n">script_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
 483 |             <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">script_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 484 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="p">)</span>
 485 | 
 486 |         <span class="c1"># css content detection</span>
 487 |         <span class="n">css_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 488 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 489 |             <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">CSS</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 490 |             <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">],</span>
 491 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 492 |         <span class="p">)</span>
 493 |         <span class="k">if</span> <span class="n">css_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
 494 |             <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">css_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 495 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">CSS</span><span class="p">)</span></div>
 496 | 
 497 | 
 498 | <div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_report">
 499 | <a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_report">[docs]</a>
 500 |     <span class="k">def</span> <span class="nf">run_pragmar_report</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">heading</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
 501 | <span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
 502 | <span class="sd">        Generate a comprehensive report of all resources for a site.</span>
 503 | <span class="sd">        Returns a formatted string with counts and URLs by type.</span>
 504 | <span class="sd">        &quot;&quot;&quot;</span>
 505 | 
 506 |         <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 507 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 508 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">,</span>
 509 |             <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
 510 |         <span class="p">)</span>
 511 | 
 512 |         <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 513 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 514 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 515 |             <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
 516 |         <span class="p">)</span>
 517 | 
 518 |         <span class="n">css_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 519 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 520 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">CSS</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 521 |             <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
 522 |         <span class="p">)</span>
 523 | 
 524 |         <span class="n">js_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 525 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 526 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 527 |             <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
 528 |         <span class="p">)</span>
 529 | 
 530 |         <span class="n">image_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 531 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 532 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">IMAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 533 |             <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
 534 |         <span class="p">)</span>
 535 | 
 536 |         <span class="n">mcp_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 537 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 538 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (mcp)&quot;</span><span class="p">,</span>
 539 |             <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
 540 |         <span class="p">)</span>
 541 | 
 542 |         <span class="n">report_lines</span> <span class="o">=</span> <span class="p">[]</span>
 543 |         <span class="n">sections</span> <span class="o">=</span> <span class="p">[</span>
 544 |             <span class="p">(</span><span class="s2">&quot;Total pages&quot;</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">),</span>
 545 |             <span class="p">(</span><span class="s2">&quot;Total HTML&quot;</span><span class="p">,</span> <span class="n">html_resources</span><span class="p">),</span>
 546 |             <span class="p">(</span><span class="s2">&quot;Total MCP search hits&quot;</span><span class="p">,</span> <span class="n">mcp_resources</span><span class="p">),</span>
 547 |             <span class="p">(</span><span class="s2">&quot;Total CSS&quot;</span><span class="p">,</span> <span class="n">css_resources</span><span class="p">),</span>
 548 |             <span class="p">(</span><span class="s2">&quot;Total JS&quot;</span><span class="p">,</span> <span class="n">js_resources</span><span class="p">),</span>
 549 |             <span class="p">(</span><span class="s2">&quot;Total Images&quot;</span><span class="p">,</span> <span class="n">image_resources</span><span class="p">)</span>
 550 |         <span class="p">]</span>
 551 | 
 552 |         <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">section_name</span><span class="p">,</span> <span class="n">resource_obj</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">sections</span><span class="p">):</span>
 553 |             <span class="n">report_lines</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">section_name</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">resource_obj</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
 554 |             <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">resource_obj</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 555 |                 <span class="n">report_lines</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">url</span><span class="p">)</span>
 556 |             <span class="k">if</span> <span class="n">i</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">sections</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
 557 |                 <span class="n">report_lines</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">)</span>
 558 | 
 559 |         <span class="n">now</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
 560 |         <span class="n">lines_together</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">report_lines</span><span class="p">)</span>
 561 | 
 562 |         <span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;&quot;&quot;</span>
 563 | <span class="s2">**********************************************************************************</span>
 564 | <span class="s2">* </span><span class="si">{</span><span class="n">heading</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">now</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span><span class="si">}</span><span class="s2">                                                    *</span>
 565 | <span class="s2">**********************************************************************************</span>
 566 | <span class="si">{</span><span class="n">lines_together</span><span class="si">}</span>
 567 | <span class="s2">&quot;&quot;&quot;</span></div>
 568 | 
 569 |     <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_status</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 570 | 
 571 |         <span class="c1"># status code filtering</span>
 572 |         <span class="n">status_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 573 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 574 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;status: 200&quot;</span><span class="p">,</span>
 575 |             <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
 576 |         <span class="p">)</span>
 577 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">status_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Status filtering should return results&quot;</span><span class="p">)</span>
 578 |         <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">status_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 579 |             <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">status</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>
 580 | 
 581 |         <span class="c1"># status code filtering</span>
 582 |         <span class="n">appstat_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 583 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 584 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;status: 200 AND url: https://pragmar.com/appstat*&quot;</span><span class="p">,</span>
 585 |             <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
 586 |         <span class="p">)</span>
 587 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">appstat_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Status filtering should return results&quot;</span><span class="p">)</span>
 588 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">appstat_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">),</span> <span class="mi">3</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Should have at least 3 results in appstat resources&quot;</span><span class="p">)</span>
 589 | 
 590 |         <span class="c1"># multiple status codes</span>
 591 |         <span class="n">multi_status_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 592 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;status: 200 OR status: 404&quot;</span><span class="p">,</span>
 593 |         <span class="p">)</span>
 594 |         <span class="k">if</span> <span class="n">multi_status_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
 595 |             <span class="n">found_statuses</span> <span class="o">=</span> <span class="p">{</span><span class="n">r</span><span class="o">.</span><span class="n">status</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">multi_status_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">}</span>
 596 |             <span class="k">for</span> <span class="n">status</span> <span class="ow">in</span> <span class="n">found_statuses</span><span class="p">:</span>
 597 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="n">status</span><span class="p">,</span> <span class="p">[</span><span class="mi">200</span><span class="p">,</span> <span class="mi">404</span><span class="p">])</span>
 598 | 
 599 |     <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_headers</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 600 | 
 601 |         <span class="c1"># supported crawls only (genuine headers data)</span>
 602 |         <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;InterroBotTests&quot;</span><span class="p">,</span><span class="s2">&quot;KatanaTests&quot;</span><span class="p">,</span> <span class="s2">&quot;WarcTests&quot;</span><span class="p">):</span>
 603 |             <span class="k">return</span>
 604 | 
 605 |         <span class="n">appstat_any</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 606 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 607 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;appstat&quot;</span><span class="p">,</span>
 608 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 609 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 610 |         <span class="p">)</span>
 611 | 
 612 |         <span class="n">appstat_headers_js</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 613 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 614 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;appstat AND headers: javascript&quot;</span><span class="p">,</span>
 615 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 616 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 617 |         <span class="p">)</span>
 618 | 
 619 |         <span class="c1"># https://pragmar.com/media/static/scripts/js/appstat.min.js</span>
 620 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_headers_js</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have exactly one resource in database (appstat.min.js)&quot;</span><span class="p">)</span>
 621 | 
 622 |         <span class="n">appstat_headers_nojs</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 623 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 624 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;appstat NOT headers: javascript&quot;</span><span class="p">,</span>
 625 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 626 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 627 |         <span class="p">)</span>
 628 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">appstat_headers_nojs</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have many appstat non-js resources in database&quot;</span><span class="p">)</span>
 629 | 
 630 |         <span class="n">appstat_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">appstat_headers_js</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">appstat_headers_nojs</span><span class="o">.</span><span class="n">total</span>
 631 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_sum</span><span class="p">,</span> <span class="n">appstat_any</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;appstat non-js + js resources should sum to all appstat&quot;</span><span class="p">)</span>
 632 | 
 633 |     <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_content</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 634 | 
 635 |         <span class="n">mcp_any</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 636 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 637 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;mcp&quot;</span><span class="p">,</span>
 638 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 639 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 640 |         <span class="p">)</span>
 641 | 
 642 |         <span class="n">mcp_content_configuration</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 643 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 644 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;mcp AND content: configuration&quot;</span><span class="p">,</span>
 645 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 646 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 647 |         <span class="p">)</span>
 648 | 
 649 |         <span class="c1"># https://pragmar.com/mcp-server-webcrawl/</span>
 650 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="n">mcp_content_configuration</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have one, possibly more resources (mcp-server-webcrawl)&quot;</span><span class="p">)</span>
 651 | 
 652 |         <span class="n">mcp_content_no_configuration</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 653 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 654 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;mcp NOT content: configuration&quot;</span><span class="p">,</span>
 655 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 656 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 657 |         <span class="p">)</span>
 658 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">mcp_content_no_configuration</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have many mcp non-configuration resources&quot;</span><span class="p">)</span>
 659 | 
 660 |         <span class="n">mcp_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">mcp_content_configuration</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">mcp_content_no_configuration</span><span class="o">.</span><span class="n">total</span>
 661 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">mcp_sum</span><span class="p">,</span> <span class="n">mcp_any</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;mcp non-config + config resources should sum to all mcp&quot;</span><span class="p">)</span>
 662 | 
 663 |         <span class="n">mcp_html_content_config</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 664 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 665 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND mcp AND content: configuration&quot;</span><span class="p">,</span>
 666 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 667 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 668 |         <span class="p">)</span>
 669 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 670 |             <span class="n">mcp_html_content_config</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">mcp_content_configuration</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 671 |             <span class="s2">&quot;Adding type constraint should not increase results&quot;</span>
 672 |         <span class="p">)</span>
 673 | 
 674 |         <span class="n">wildcard_content_search</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 675 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 676 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;content: config*&#39;</span><span class="p">,</span>
 677 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 678 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 679 |         <span class="p">)</span>
 680 |         <span class="n">exact_config_search</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 681 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 682 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;content: configuration&#39;</span><span class="p">,</span>
 683 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 684 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 685 |         <span class="p">)</span>
 686 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 687 |             <span class="n">wildcard_content_search</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">exact_config_search</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 688 |             <span class="s2">&quot;Wildcard content search should return at least as many results as exact match&quot;</span>
 689 |         <span class="p">)</span>
 690 | 
 691 |     <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 692 | 
 693 |         <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 694 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 695 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html&quot;</span><span class="p">,</span>
 696 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 697 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 698 |         <span class="p">)</span>
 699 | 
 700 |         <span class="c1"># page count varies by crawler, 10 is conservative low end</span>
 701 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">html_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="s2">&quot;Should have greater than 10 HTML resources&quot;</span><span class="p">)</span>
 702 | 
 703 |         <span class="n">not_html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 704 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 705 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;NOT type: html&quot;</span><span class="p">,</span>
 706 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 707 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 708 |         <span class="p">)</span>
 709 |         <span class="c1"># wget is HTML-only fixture</span>
 710 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">not_html_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="s2">&quot;Should have greater than 10 non-HTML resources&quot;</span><span class="p">)</span>
 711 | 
 712 |         <span class="n">html_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">not_html_resources</span><span class="o">.</span><span class="n">total</span>
 713 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">html_sum</span><span class="p">,</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;HTML + non-HTML should sum to all resources&quot;</span><span class="p">)</span>
 714 | 
 715 |         <span class="c1"># keyword + type combination</span>
 716 |         <span class="n">appstat_any</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 717 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 718 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;appstat&quot;</span><span class="p">,</span>
 719 |             <span class="n">limit</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
 720 |         <span class="p">)</span>
 721 | 
 722 |         <span class="n">appstat_script</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 723 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 724 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;appstat AND type: script&quot;</span><span class="p">,</span>
 725 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 726 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 727 |         <span class="p">)</span>
 728 | 
 729 |         <span class="c1"># https://pragmar.com/media/static/scripts/js/appstat.min.js</span>
 730 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_script</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have exactly one appstat script (appstat.min.js)&quot;</span><span class="p">)</span>
 731 | 
 732 |         <span class="n">appstat_not_script</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 733 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 734 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;appstat NOT type: script&quot;</span><span class="p">,</span>
 735 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 736 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 737 |         <span class="p">)</span>
 738 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">appstat_not_script</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have many appstat non-script resources&quot;</span><span class="p">)</span>
 739 | 
 740 |         <span class="n">appstat_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">appstat_script</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">appstat_not_script</span><span class="o">.</span><span class="n">total</span>
 741 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_sum</span><span class="p">,</span> <span class="n">appstat_any</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;appstat script + non-script should sum to all appstat&quot;</span><span class="p">)</span>
 742 | 
 743 |         <span class="c1"># type OR combinations</span>
 744 |         <span class="n">html_or_img</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 745 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 746 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html OR type: img&quot;</span><span class="p">,</span>
 747 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 748 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 749 |         <span class="p">)</span>
 750 | 
 751 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">html_or_img</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="s2">&quot;HTML + IMG should be greater than 20 resources&quot;</span><span class="p">)</span>
 752 | 
 753 |         <span class="n">img_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 754 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 755 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: img&quot;</span><span class="p">,</span>
 756 |             <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
 757 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 758 |         <span class="p">)</span>
 759 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 760 |             <span class="n">html_or_img</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 761 |             <span class="s2">&quot;OR should include all HTML resources&quot;</span>
 762 |         <span class="p">)</span>
 763 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 764 |             <span class="n">html_or_img</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">img_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 765 |             <span class="s2">&quot;OR should include all IMG resources&quot;</span>
 766 |         <span class="p">)</span>
 767 | 
 768 |         <span class="c1"># combined filtering</span>
 769 |         <span class="n">combined_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 770 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 771 |             <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;style AND type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
 772 |             <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
 773 |             <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">,</span>
 774 |             <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
 775 |         <span class="p">)</span>
 776 | 
 777 |         <span class="k">if</span> <span class="n">combined_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
 778 |             <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">combined_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
 779 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">site</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
 780 |                 <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="p">)</span>
 781 | 
 782 |     <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_fulltext</span><span class="p">(</span>
 783 |             <span class="bp">self</span><span class="p">,</span>
 784 |             <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span>
 785 |             <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
 786 |             <span class="n">site_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span>
 787 |         <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 788 | 
 789 |         <span class="c1"># Boolean workout</span>
 790 |         <span class="c1"># result counts are fragile, intersections should not be</span>
 791 |         <span class="c1"># counts are worth the fragility, for now</span>
 792 | 
 793 |         <span class="n">boolean_primary_resources</span>  <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 794 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 795 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 796 |             <span class="n">limit</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span>
 797 |         <span class="p">)</span>
 798 | 
 799 |         <span class="c1"># varies by crawler, katana doesn&#39;t crawl /help/ depth by default</span>
 800 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary search should return results&quot;</span><span class="p">)</span>
 801 | 
 802 |         <span class="n">boolean_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 803 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 804 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 805 |             <span class="n">limit</span><span class="o">=</span><span class="mi">12</span><span class="p">,</span>
 806 |         <span class="p">)</span>
 807 | 
 808 |         <span class="c1"># re: all these &gt; 0 checks, result counts vary by crawler, all have default crawl behaviors/depths/externals</span>
 809 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">boolean_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Secondary search should return results&quot;</span><span class="p">)</span>
 810 | 
 811 |         <span class="c1"># AND</span>
 812 |         <span class="n">primary_and_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 813 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 814 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> AND </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 815 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 816 |         <span class="p">)</span>
 817 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary AND Secondary should return results&quot;</span><span class="p">)</span>
 818 | 
 819 |         <span class="c1"># OR</span>
 820 |         <span class="n">primary_or_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 821 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 822 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 823 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 824 |         <span class="p">)</span>
 825 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary OR Secondary should return results (union)&quot;</span><span class="p">)</span>
 826 | 
 827 |         <span class="c1"># NOT</span>
 828 |         <span class="n">primary_not_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 829 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 830 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> NOT </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 831 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 832 |         <span class="p">)</span>
 833 | 
 834 |         <span class="n">secondary_not_primary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 835 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 836 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2"> NOT </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 837 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 838 |         <span class="p">)</span>
 839 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">secondary_not_primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Secondary NOT Primary should return results&quot;</span><span class="p">)</span>
 840 | 
 841 |         <span class="c1"># logical relationships</span>
 842 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
 843 |             <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 844 |             <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">boolean_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">-</span> <span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 845 |             <span class="s2">&quot;Intersection should equal A + B - Union (inclusion-exclusion principle)&quot;</span>
 846 |         <span class="p">)</span>
 847 | 
 848 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
 849 |             <span class="n">primary_not_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 850 |             <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span><span class="p">,</span>
 851 |             <span class="s2">&quot;Primary NOT Secondary + Primary AND Secondary should equal total Primary results&quot;</span>
 852 |         <span class="p">)</span>
 853 | 
 854 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
 855 |             <span class="n">secondary_not_primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 856 |             <span class="n">boolean_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 857 |             <span class="s2">&quot;Secondary NOT Primary + Primary AND Secondary should equal total Secondary results&quot;</span>
 858 |         <span class="p">)</span>
 859 | 
 860 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
 861 |             <span class="n">primary_not_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">secondary_not_primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 862 |             <span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 863 |             <span class="s2">&quot;Sum of exclusive sets plus intersection should equal union&quot;</span>
 864 |         <span class="p">)</span>
 865 | 
 866 |         <span class="c1"># complex boolean with field constraints</span>
 867 |         <span class="n">primary_and_html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 868 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 869 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 870 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 871 |         <span class="p">)</span>
 872 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_and_html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary AND type:html should return results&quot;</span><span class="p">)</span>
 873 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
 874 |             <span class="n">primary_and_html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span><span class="p">,</span>
 875 |             <span class="s2">&quot;Adding AND constraints should not increase result count&quot;</span>
 876 |         <span class="p">)</span>
 877 | 
 878 |         <span class="c1"># Parentheses grouping</span>
 879 |         <span class="n">grouped_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 880 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 881 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
 882 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 883 |         <span class="p">)</span>
 884 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">grouped_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Grouped OR with HTML filter should return results&quot;</span><span class="p">)</span>
 885 | 
 886 | 
 887 |         <span class="n">hyphenated_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 888 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 889 |             <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_HYPHENATED_KEYWORD</span><span class="p">,</span>
 890 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 891 |         <span class="p">)</span>
 892 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">hyphenated_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Keyword &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_HYPHENATED_KEYWORD</span><span class="si">}</span><span class="s2">&#39; should return results&quot;</span><span class="p">)</span>
 893 | 
 894 |         <span class="n">double_or_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 895 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 896 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR moffitor)&quot;</span>
 897 |         <span class="p">)</span>
 898 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span>
 899 |             <span class="n">double_or_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span>
 900 |             <span class="sa">f</span><span class="s2">&quot;OR query should return some results&quot;</span>
 901 |         <span class="p">)</span>
 902 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertLessEqual</span><span class="p">(</span>
 903 |             <span class="n">double_or_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 904 |             <span class="sa">f</span><span class="s2">&quot;OR query should be less than, or equal to all results&quot;</span>
 905 |         <span class="p">)</span>
 906 |         <span class="n">parens_or_and_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 907 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 908 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">) AND collaborations &quot;</span>
 909 |         <span class="p">)</span>
 910 |         <span class="c1"># respect the AND, there should be only one result</span>
 911 |         <span class="c1"># (A OR B) AND C vs. A OR B AND C</span>
 912 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
 913 |             <span class="n">parens_or_and_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span>
 914 |             <span class="sa">f</span><span class="s2">&quot;(A OR B) AND C should be 1 result (AND collaborations, unless fixture changed)&quot;</span>
 915 |         <span class="p">)</span>
 916 | 
 917 |         <span class="n">parens_or_and_resources_reverse</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 918 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 919 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;collaborations AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">) &quot;</span>
 920 |         <span class="p">)</span>
 921 |         <span class="c1"># respect the AND, there should be only one result</span>
 922 |         <span class="c1"># (A OR B) AND C vs. A OR B AND C</span>
 923 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
 924 |             <span class="n">parens_or_and_resources_reverse</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span>
 925 |             <span class="sa">f</span><span class="s2">&quot;A AND (B OR C) should be 1 result (collaborations AND, unless fixture changed)&quot;</span>
 926 |         <span class="p">)</span>
 927 | 
 928 |         <span class="n">wide_type_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 929 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 930 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: script OR type: style OR type: iframe OR type: font OR type: text OR type: rss OR type: other&quot;</span>
 931 |         <span class="p">)</span>
 932 | 
 933 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertLess</span><span class="p">(</span>
 934 |             <span class="n">wide_type_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 935 |             <span class="sa">f</span><span class="s2">&quot;A long chained OR should not return all results&quot;</span>
 936 |         <span class="p">)</span>
 937 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span>
 938 |             <span class="n">wide_type_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span>
 939 |             <span class="sa">f</span><span class="s2">&quot;A long chained OR should return some results&quot;</span>
 940 |         <span class="p">)</span>
 941 | 
 942 |         <span class="n">complex_and</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 943 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 944 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> AND type:html AND status:200&quot;</span>
 945 |         <span class="p">)</span>
 946 | 
 947 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">complex_and</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span><span class="p">,</span>
 948 |                 <span class="s2">&quot;Adding AND conditions should not increase results&quot;</span><span class="p">)</span>
 949 | 
 950 |         <span class="n">grouped_or</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 951 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 952 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">) AND type:html AND status:200&quot;</span>
 953 |         <span class="p">)</span>
 954 | 
 955 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">grouped_or</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 956 |                 <span class="s2">&quot;Adding AND conditions to OR should not increase results&quot;</span><span class="p">)</span>
 957 | 
 958 |         <span class="c1"># URL OR parsing, url is a special case, an fts5 field searched with SQL LIKE</span>
 959 |         <span class="n">url_or_simple</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 960 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;url: pragmar.com OR url: example.com&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
 961 |         <span class="n">url_or_with_type</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 962 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html AND (url: pragmar.com OR url: example.com)&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
 963 |         <span class="n">html_total</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 964 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
 965 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">url_or_with_type</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">url_or_simple</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 966 |             <span class="sa">f</span><span class="s2">&quot;AND constraint should not increase results&quot;</span><span class="p">)</span>
 967 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">url_or_with_type</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">html_total</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
 968 |             <span class="sa">f</span><span class="s2">&quot;URL filter should not exceed HTML total&quot;</span><span class="p">)</span>
 969 | 
 970 |     <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_extras</span><span class="p">(</span>
 971 |             <span class="bp">self</span><span class="p">,</span>
 972 |             <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span>
 973 |             <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
 974 |             <span class="n">site_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">,</span>
 975 |             <span class="n">primary_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">,</span>
 976 |             <span class="n">secondary_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">,</span>
 977 |         <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
 978 | 
 979 |         <span class="n">snippet_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 980 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 981 |             <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> AND type: html&quot;</span><span class="p">,</span>
 982 |             <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;snippets&quot;</span><span class="p">],</span>
 983 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 984 |         <span class="p">)</span>
 985 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;snippets&quot;</span><span class="p">,</span> <span class="n">snippet_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
 986 |                 <span class="s2">&quot;First result should have snippets in extras&quot;</span><span class="p">)</span>
 987 | 
 988 |         <span class="n">xpath_count_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 989 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 990 |             <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">,</span>
 991 |             <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;markdown&quot;</span><span class="p">],</span>
 992 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
 993 |         <span class="p">)</span>
 994 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;markdown&quot;</span><span class="p">,</span> <span class="n">xpath_count_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
 995 |                 <span class="s2">&quot;First result should have markdown in extras&quot;</span><span class="p">)</span>
 996 | 
 997 |         <span class="n">xpath_count_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
 998 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
 999 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;url: pragmar.com AND status: 200&quot;</span><span class="p">,</span>
1000 |             <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;xpath&quot;</span><span class="p">],</span>
1001 |             <span class="n">extrasXpath</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;count(//h1)&quot;</span><span class="p">],</span>
1002 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
1003 |             <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-url&quot;</span>
1004 |         <span class="p">)</span>
1005 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;xpath&quot;</span><span class="p">,</span> <span class="n">xpath_count_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
1006 |                 <span class="s2">&quot;First result should have xpath in extras&quot;</span><span class="p">)</span>
1007 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">xpath_count_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">][</span><span class="s2">&quot;xpath&quot;</span><span class="p">]),</span>
1008 |                 <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should be exactly one H1 hit in xpath extras&quot;</span><span class="p">)</span>
1009 | 
1010 |         <span class="c1"># this test inadvertently also covers t_URL_FIELD parser testing</span>
1011 |         <span class="n">xpath_h1_text_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
1012 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
1013 |             <span class="n">query</span><span class="o">=</span><span class="s2">&quot;url: https://pragmar.com AND status: 200&quot;</span><span class="p">,</span>
1014 |             <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;xpath&quot;</span><span class="p">],</span>
1015 |             <span class="n">extrasXpath</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;//h1/text()&quot;</span><span class="p">],</span>
1016 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
1017 |             <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span>
1018 |         <span class="p">)</span>
1019 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;xpath&quot;</span><span class="p">,</span> <span class="n">xpath_h1_text_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
1020 |                 <span class="s2">&quot;First result should have xpath in extras&quot;</span><span class="p">)</span>
1021 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span> <span class="n">xpath_h1_text_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span>
1022 |                 <span class="s2">&quot;Should have pragmar in fixture h1&quot;</span><span class="p">)</span>
1023 | 
1024 |         <span class="c1"># should be pragmar homepage, assert &quot;pragmar&quot; in h1</span>
1025 |         <span class="n">first_xpath_result</span> <span class="o">=</span> <span class="n">xpath_h1_text_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">][</span><span class="s2">&quot;xpath&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">][</span><span class="s2">&quot;value&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
1026 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="s2">&quot;pragmar&quot;</span> <span class="ow">in</span> <span class="n">first_xpath_result</span><span class="p">,</span>
1027 |                 <span class="sa">f</span><span class="s2">&quot;Should have pragmar in fixture homepage h1 (</span><span class="si">{</span><span class="n">first_xpath_result</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">)</span>
1028 | 
1029 |         <span class="n">combined_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
1030 |             <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
1031 |             <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">,</span>
1032 |             <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;snippets&quot;</span><span class="p">,</span> <span class="s2">&quot;markdown&quot;</span><span class="p">],</span>
1033 |             <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
1034 |         <span class="p">)</span>
1035 |         <span class="n">first_result</span> <span class="o">=</span> <span class="n">combined_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
1036 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;extras&quot;</span><span class="p">,</span> <span class="n">first_result</span><span class="p">,</span> <span class="s2">&quot;First result should have extras field&quot;</span><span class="p">)</span>
1037 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;snippets&quot;</span><span class="p">,</span> <span class="n">first_result</span><span class="p">[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span> <span class="s2">&quot;First result should have snippets in extras&quot;</span><span class="p">)</span>
1038 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;markdown&quot;</span><span class="p">,</span> <span class="n">first_result</span><span class="p">[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span> <span class="s2">&quot;First result should have markdown in extras&quot;</span><span class="p">)</span>
1039 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
1040 |                 <span class="s2">&quot;Search should return less than or equivalent results to site total&quot;</span><span class="p">)</span>
1041 |         <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
1042 |                 <span class="s2">&quot;Search should return less than or equivalent results to site total&quot;</span><span class="p">)</span></div>
1043 | 
1044 | </pre></div>
1045 | 
1046 |            </div>
1047 |           </div>
1048 |           <footer>
1049 | 
1050 |   <hr/>
1051 | 
1052 |   <div role="contentinfo">
1053 |     <p>&#169; Copyright 2025, pragmar.</p>
1054 |   </div>
1055 | 
1056 |   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
1057 |     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
1058 |     provided by <a href="https://readthedocs.org">Read the Docs</a>.
1059 |    
1060 | 
1061 | </footer>
1062 |         </div>
1063 |       </div>
1064 |     </section>
1065 |   </div>
1066 |   <script>
1067 |       jQuery(function () {
1068 |           SphinxRtdTheme.Navigation.enable(true);
1069 |       });
1070 |   </script> 
1071 | 
1072 | </body>
1073 | </html>
```
Page 34/35FirstPrevNextLast