From 9deb55806d33976177ec040153550b0e8ebdc3eb Mon Sep 17 00:00:00 2001 From: Maud LAURENT Date: Fri, 11 Jun 2021 10:18:39 +0200 Subject: [PATCH] Add optional token for regex and httpheader. Add htmlparser source (#182) add an httptoken option; add htmlparser with xpath source Co-authored-by: Maud LAURENT Co-authored-by: lilydjwg --- .github/workflows/tests.yaml | 2 +- docs/usage.rst | 25 +++++++++++++++++++++++++ mypy.ini | 3 +++ nvchecker/api.py | 3 ++- nvchecker/ctxvars.py | 1 + nvchecker/httpclient/base.py | 5 ++++- nvchecker/util.py | 6 ++++++ nvchecker_source/htmlparser.py | 28 ++++++++++++++++++++++++++++ tests/test_htmlpasrer.py | 21 +++++++++++++++++++++ tests/test_httpheader.py | 12 +++++++++++- tests/test_regex.py | 16 ++++++++++++++++ 11 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 nvchecker_source/htmlparser.py create mode 100644 tests/test_htmlpasrer.py diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 5ba1d63..6c19858 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -41,7 +41,7 @@ jobs: if: ${{ contains(matrix.deps, 'pycurl') }} run: sudo apt install -y libcurl4-openssl-dev - name: Install Python deps - run: pip install -U ${{ matrix.deps }} pytest pytest-asyncio pytest-httpbin flaky structlog toml appdirs + run: pip install -U ${{ matrix.deps }} pytest pytest-asyncio pytest-httpbin flaky structlog toml appdirs lxml - name: Decrypt keys env: KEY: ${{ secrets.KEY }} diff --git a/docs/usage.rst b/docs/usage.rst index b06ecbf..c6f4529 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -176,6 +176,15 @@ tries This only works when the source implementation uses the builtin HTTP client. +httptoken + A personal authorization token used to fetch the url with the ``Authorization`` header. + The type of token depends on the authorization required. + + - For Bearer token set : ``Bearer `` + - For Basic token set : ``Basic `` + + In the keyfile add ``httptoken_{name}`` token. + If both ``prefix`` and ``from_pattern``/``to_pattern`` are used, ``from_pattern``/``to_pattern`` are ignored. If you want to strip the prefix and then do something special, just use ``from_pattern```/``to_pattern``. For @@ -262,6 +271,22 @@ method follow_redirects (*Optional*) Whether to follow 3xx HTTP redirects. Default is ``false``. If you are looking at a ``Location`` header, you shouldn't change this. +Search with an HTML Parser +~~~~~~~~~~~~~~~~~~~~~~~~~~ +:: + + source = "htmlparser" + +Send an HTTP request and search through the body a specific xpath. + +url + The URL of the HTTP request. + +xpath + An xpath expression used to find the version string. + +.. note:: + An additional dependency "lxml" is required. Find with a Command ~~~~~~~~~~~~~~~~~~~ diff --git a/mypy.ini b/mypy.ini index a02b84b..1aead6f 100644 --- a/mypy.ini +++ b/mypy.ini @@ -20,3 +20,6 @@ ignore_missing_imports = True [mypy-appdirs] ignore_missing_imports = True + +[mypy-lxml] +ignore_missing_imports = True diff --git a/nvchecker/api.py b/nvchecker/api.py index 049de04..cd9b991 100644 --- a/nvchecker/api.py +++ b/nvchecker/api.py @@ -7,4 +7,5 @@ from .util import ( AsyncCache, KeyManager, GetVersionError, EntryWaiter, ) from .sortversion import sort_version_keys -from .ctxvars import tries, proxy, user_agent, entry_waiter + +from .ctxvars import tries, proxy, user_agent, httptoken, entry_waiter diff --git a/nvchecker/ctxvars.py b/nvchecker/ctxvars.py index c68c9db..eb82a29 100644 --- a/nvchecker/ctxvars.py +++ b/nvchecker/ctxvars.py @@ -16,4 +16,5 @@ if TYPE_CHECKING: tries = ContextVar('tries', default=1) proxy: ContextVar[Optional[str]] = ContextVar('proxy', default=None) user_agent = ContextVar('user_agent', default=DEFAULT_USER_AGENT) +httptoken = ContextVar('httptoken', default=None) entry_waiter: ContextVar[EntryWaiter] = ContextVar('entry_waiter') diff --git a/nvchecker/httpclient/base.py b/nvchecker/httpclient/base.py index f4f6061..2c14938 100644 --- a/nvchecker/httpclient/base.py +++ b/nvchecker/httpclient/base.py @@ -5,7 +5,7 @@ import structlog from typing import Optional, Dict, Mapping import json as _json -from ..ctxvars import tries, proxy, user_agent +from ..ctxvars import tries, proxy, user_agent, httptoken logger = structlog.get_logger(logger_name=__name__) @@ -65,9 +65,12 @@ class BaseSession: t = tries.get() p = proxy.get() ua = user_agent.get() + httpt = httptoken.get() headers = headers.copy() headers.setdefault('User-Agent', ua) + if httpt is not None: + headers.setdefault('Authorization', httpt) for i in range(1, t+1): try: diff --git a/nvchecker/util.py b/nvchecker/util.py index 9a9ee33..1184e00 100644 --- a/nvchecker/util.py +++ b/nvchecker/util.py @@ -21,6 +21,7 @@ from .httpclient import session from .ctxvars import tries as ctx_tries from .ctxvars import proxy as ctx_proxy from .ctxvars import user_agent as ctx_ua +from .ctxvars import httptoken as ctx_httpt logger = structlog.get_logger(logger_name=__name__) @@ -246,6 +247,11 @@ class FunctionWorker(BaseWorker): ua = entry.get('user_agent', None) if ua is not None: ctx_ua.set(ua) + httpt = entry.get('httptoken', None) + if httpt is None: + httpt = self.keymanager.get_key('httptoken_'+name) + if httpt is not None: + ctx_httpt.set(httpt) try: async with self.task_sem: diff --git a/nvchecker_source/htmlparser.py b/nvchecker_source/htmlparser.py new file mode 100644 index 0000000..a64f213 --- /dev/null +++ b/nvchecker_source/htmlparser.py @@ -0,0 +1,28 @@ +# MIT licensed +# Copyright (c) 2020 Ypsilik , et al. +# Copyright (c) 2013-2020 lilydjwg , et al. + +from lxml import html, etree + +from nvchecker.api import session, GetVersionError + +async def get_version(name, conf, *, cache, **kwargs): + key = tuple(sorted(conf.items())) + return await cache.get(key, get_version_impl) + +async def get_version_impl(info): + conf = dict(info) + + encoding = conf.get('encoding') + parser = html.HTMLParser(encoding=encoding) + res = await session.get(conf['url']) + doc = html.fromstring(res.body, base_url=conf['url'], parser=parser) + + try: + version = doc.xpath(conf.get('xpath')) + except ValueError: + if not conf.get('missing_ok', False): + raise GetVersionError('version string not found.') + except etree.XPathEvalError as e: + raise GetVersionError('bad xpath', exc_info=e) + return version diff --git a/tests/test_htmlpasrer.py b/tests/test_htmlpasrer.py new file mode 100644 index 0000000..f3be52b --- /dev/null +++ b/tests/test_htmlpasrer.py @@ -0,0 +1,21 @@ +# MIT licensed +# Copyright (c) 2021 ypsilik , et al. + +import pytest + +pytestmark = [pytest.mark.asyncio, pytest.mark.needs_net] + +async def test_xpath_ok(get_version): + assert await get_version("unifiedremote", { + "source": "htmlparser", + "url": "http://httpbin.org/", + "xpath": '//pre[@class="version"]/text()', + }) != None + +async def test_xpath_missing_ok(get_version): + assert await get_version("unifiedremote", { + "source": "htmlparser", + "url": "http://httpbin.org/", + "xpath": '//pre[@class="test-is-ok"]/text()', + "missing_ok": True, + }) is None diff --git a/tests/test_httpheader.py b/tests/test_httpheader.py index ccff11b..68ddb51 100644 --- a/tests/test_httpheader.py +++ b/tests/test_httpheader.py @@ -2,6 +2,8 @@ # Copyright (c) 2021 lilydjwg , et al. import pytest +import pytest_httpbin +assert pytest_httpbin # for pyflakes pytestmark = [pytest.mark.asyncio, pytest.mark.needs_net] @@ -10,5 +12,13 @@ async def test_redirection(get_version): "source": "httpheader", "url": "https://www.unifiedremote.com/download/linux-x64-deb", "regex": r'urserver-([\d.]+).deb', - }) != None + }) is not None +async def test_get_version_withtoken(get_version, httpbin): + assert await get_version("unifiedremote", { + "source": "httpheader", + "url": httpbin.url + "/basic-auth/username/superpassword", + "httptoken": "Basic dXNlcm5hbWU6c3VwZXJwYXNzd29yZA==", + "header": "server", + "regex": r'([0-9.]+)*', + }) is not None diff --git a/tests/test_regex.py b/tests/test_regex.py index 23737e9..739700c 100644 --- a/tests/test_regex.py +++ b/tests/test_regex.py @@ -42,3 +42,19 @@ async def test_missing_ok(get_version, httpbin): "regex": "foobar", "missing_ok": True, }) is None + +async def test_regex_with_tokenBasic(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/basic-auth/username/superpassword", + "httptoken": "Basic dXNlcm5hbWU6c3VwZXJwYXNzd29yZA==", + "regex": r'"user":"([a-w]+)"', + }) == "username" + +async def test_regex_with_tokenBearer(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/bearer", + "httptoken": "Bearer username:password", + "regex": r'"token":"([a-w]+):.*"', + }) == "username"