Add optional token for regex and httpheader. Add htmlparser source (#182)

add an httptoken option; add htmlparser with xpath source

Co-authored-by: Maud LAURENT <maud.laurent@eurecia.com>
Co-authored-by: lilydjwg <lilydjwg@gmail.com>
This commit is contained in:
Maud LAURENT 2021-06-11 10:18:39 +02:00 committed by GitHub
parent 4033c0b9ba
commit 9deb55806d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 118 additions and 4 deletions

View File

@ -41,7 +41,7 @@ jobs:
if: ${{ contains(matrix.deps, 'pycurl') }} if: ${{ contains(matrix.deps, 'pycurl') }}
run: sudo apt install -y libcurl4-openssl-dev run: sudo apt install -y libcurl4-openssl-dev
- name: Install Python deps - name: Install Python deps
run: pip install -U ${{ matrix.deps }} pytest pytest-asyncio pytest-httpbin flaky structlog toml appdirs run: pip install -U ${{ matrix.deps }} pytest pytest-asyncio pytest-httpbin flaky structlog toml appdirs lxml
- name: Decrypt keys - name: Decrypt keys
env: env:
KEY: ${{ secrets.KEY }} KEY: ${{ secrets.KEY }}

View File

@ -176,6 +176,15 @@ tries
This only works when the source implementation uses the builtin HTTP client. This only works when the source implementation uses the builtin HTTP client.
httptoken
A personal authorization token used to fetch the url with the ``Authorization`` header.
The type of token depends on the authorization required.
- For Bearer token set : ``Bearer <Your_bearer_token>``
- For Basic token set : ``Basic <Your_base64_encoded_token>``
In the keyfile add ``httptoken_{name}`` token.
If both ``prefix`` and ``from_pattern``/``to_pattern`` are used, If both ``prefix`` and ``from_pattern``/``to_pattern`` are used,
``from_pattern``/``to_pattern`` are ignored. If you want to strip the prefix ``from_pattern``/``to_pattern`` are ignored. If you want to strip the prefix
and then do something special, just use ``from_pattern```/``to_pattern``. For and then do something special, just use ``from_pattern```/``to_pattern``. For
@ -262,6 +271,22 @@ method
follow_redirects follow_redirects
(*Optional*) Whether to follow 3xx HTTP redirects. Default is ``false``. If you are looking at a ``Location`` header, you shouldn't change this. (*Optional*) Whether to follow 3xx HTTP redirects. Default is ``false``. If you are looking at a ``Location`` header, you shouldn't change this.
Search with an HTML Parser
~~~~~~~~~~~~~~~~~~~~~~~~~~
::
source = "htmlparser"
Send an HTTP request and search through the body a specific xpath.
url
The URL of the HTTP request.
xpath
An xpath expression used to find the version string.
.. note::
An additional dependency "lxml" is required.
Find with a Command Find with a Command
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~

View File

@ -20,3 +20,6 @@ ignore_missing_imports = True
[mypy-appdirs] [mypy-appdirs]
ignore_missing_imports = True ignore_missing_imports = True
[mypy-lxml]
ignore_missing_imports = True

View File

@ -7,4 +7,5 @@ from .util import (
AsyncCache, KeyManager, GetVersionError, EntryWaiter, AsyncCache, KeyManager, GetVersionError, EntryWaiter,
) )
from .sortversion import sort_version_keys from .sortversion import sort_version_keys
from .ctxvars import tries, proxy, user_agent, entry_waiter
from .ctxvars import tries, proxy, user_agent, httptoken, entry_waiter

View File

@ -16,4 +16,5 @@ if TYPE_CHECKING:
tries = ContextVar('tries', default=1) tries = ContextVar('tries', default=1)
proxy: ContextVar[Optional[str]] = ContextVar('proxy', default=None) proxy: ContextVar[Optional[str]] = ContextVar('proxy', default=None)
user_agent = ContextVar('user_agent', default=DEFAULT_USER_AGENT) user_agent = ContextVar('user_agent', default=DEFAULT_USER_AGENT)
httptoken = ContextVar('httptoken', default=None)
entry_waiter: ContextVar[EntryWaiter] = ContextVar('entry_waiter') entry_waiter: ContextVar[EntryWaiter] = ContextVar('entry_waiter')

View File

@ -5,7 +5,7 @@ import structlog
from typing import Optional, Dict, Mapping from typing import Optional, Dict, Mapping
import json as _json import json as _json
from ..ctxvars import tries, proxy, user_agent from ..ctxvars import tries, proxy, user_agent, httptoken
logger = structlog.get_logger(logger_name=__name__) logger = structlog.get_logger(logger_name=__name__)
@ -65,9 +65,12 @@ class BaseSession:
t = tries.get() t = tries.get()
p = proxy.get() p = proxy.get()
ua = user_agent.get() ua = user_agent.get()
httpt = httptoken.get()
headers = headers.copy() headers = headers.copy()
headers.setdefault('User-Agent', ua) headers.setdefault('User-Agent', ua)
if httpt is not None:
headers.setdefault('Authorization', httpt)
for i in range(1, t+1): for i in range(1, t+1):
try: try:

View File

@ -21,6 +21,7 @@ from .httpclient import session
from .ctxvars import tries as ctx_tries from .ctxvars import tries as ctx_tries
from .ctxvars import proxy as ctx_proxy from .ctxvars import proxy as ctx_proxy
from .ctxvars import user_agent as ctx_ua from .ctxvars import user_agent as ctx_ua
from .ctxvars import httptoken as ctx_httpt
logger = structlog.get_logger(logger_name=__name__) logger = structlog.get_logger(logger_name=__name__)
@ -246,6 +247,11 @@ class FunctionWorker(BaseWorker):
ua = entry.get('user_agent', None) ua = entry.get('user_agent', None)
if ua is not None: if ua is not None:
ctx_ua.set(ua) ctx_ua.set(ua)
httpt = entry.get('httptoken', None)
if httpt is None:
httpt = self.keymanager.get_key('httptoken_'+name)
if httpt is not None:
ctx_httpt.set(httpt)
try: try:
async with self.task_sem: async with self.task_sem:

View File

@ -0,0 +1,28 @@
# MIT licensed
# Copyright (c) 2020 Ypsilik <tt2laurent.maud@gmail.com>, et al.
# Copyright (c) 2013-2020 lilydjwg <lilydjwg@gmail.com>, et al.
from lxml import html, etree
from nvchecker.api import session, GetVersionError
async def get_version(name, conf, *, cache, **kwargs):
key = tuple(sorted(conf.items()))
return await cache.get(key, get_version_impl)
async def get_version_impl(info):
conf = dict(info)
encoding = conf.get('encoding')
parser = html.HTMLParser(encoding=encoding)
res = await session.get(conf['url'])
doc = html.fromstring(res.body, base_url=conf['url'], parser=parser)
try:
version = doc.xpath(conf.get('xpath'))
except ValueError:
if not conf.get('missing_ok', False):
raise GetVersionError('version string not found.')
except etree.XPathEvalError as e:
raise GetVersionError('bad xpath', exc_info=e)
return version

21
tests/test_htmlpasrer.py Normal file
View File

@ -0,0 +1,21 @@
# MIT licensed
# Copyright (c) 2021 ypsilik <tt2laurent.maud@gmail.com>, et al.
import pytest
pytestmark = [pytest.mark.asyncio, pytest.mark.needs_net]
async def test_xpath_ok(get_version):
assert await get_version("unifiedremote", {
"source": "htmlparser",
"url": "http://httpbin.org/",
"xpath": '//pre[@class="version"]/text()',
}) != None
async def test_xpath_missing_ok(get_version):
assert await get_version("unifiedremote", {
"source": "htmlparser",
"url": "http://httpbin.org/",
"xpath": '//pre[@class="test-is-ok"]/text()',
"missing_ok": True,
}) is None

View File

@ -2,6 +2,8 @@
# Copyright (c) 2021 lilydjwg <lilydjwg@gmail.com>, et al. # Copyright (c) 2021 lilydjwg <lilydjwg@gmail.com>, et al.
import pytest import pytest
import pytest_httpbin
assert pytest_httpbin # for pyflakes
pytestmark = [pytest.mark.asyncio, pytest.mark.needs_net] pytestmark = [pytest.mark.asyncio, pytest.mark.needs_net]
@ -10,5 +12,13 @@ async def test_redirection(get_version):
"source": "httpheader", "source": "httpheader",
"url": "https://www.unifiedremote.com/download/linux-x64-deb", "url": "https://www.unifiedremote.com/download/linux-x64-deb",
"regex": r'urserver-([\d.]+).deb', "regex": r'urserver-([\d.]+).deb',
}) != None }) is not None
async def test_get_version_withtoken(get_version, httpbin):
assert await get_version("unifiedremote", {
"source": "httpheader",
"url": httpbin.url + "/basic-auth/username/superpassword",
"httptoken": "Basic dXNlcm5hbWU6c3VwZXJwYXNzd29yZA==",
"header": "server",
"regex": r'([0-9.]+)*',
}) is not None

View File

@ -42,3 +42,19 @@ async def test_missing_ok(get_version, httpbin):
"regex": "foobar", "regex": "foobar",
"missing_ok": True, "missing_ok": True,
}) is None }) is None
async def test_regex_with_tokenBasic(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/basic-auth/username/superpassword",
"httptoken": "Basic dXNlcm5hbWU6c3VwZXJwYXNzd29yZA==",
"regex": r'"user":"([a-w]+)"',
}) == "username"
async def test_regex_with_tokenBearer(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/bearer",
"httptoken": "Bearer username:password",
"regex": r'"token":"([a-w]+):.*"',
}) == "username"