From c43d4e900f4e62ae39464169621c465a45101dd5 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Sun, 5 Sep 2021 16:26:30 -0400 Subject: [PATCH] Support HTTP POST request in the htmlparser and regex source Example: source = "regex" regex = "spcm_linux_libs_v[0-9a-zA-Z]*" url = "https://spectrum-instrumentation.com/spcm_downloads_downloads_ajax" post_data = "series%5B%5D=273&families%5B%5D=475" --- docs/usage.rst | 12 ++++++++++++ nvchecker_source/htmlparser.py | 8 +++++++- nvchecker_source/regex.py | 8 +++++++- tests/test_regex.py | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 0da4d18..72fe770 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -269,6 +269,12 @@ regex When multiple version strings are found, the maximum of those is chosen. +post_data + (*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option. + +post_data_type + (*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``. + This source supports :ref:`list options`. Search in an HTTP header @@ -312,6 +318,12 @@ url xpath An xpath expression used to find the version string. +post_data + (*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option. + +post_data_type + (*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``. + .. note:: An additional dependency "lxml" is required. diff --git a/nvchecker_source/htmlparser.py b/nvchecker_source/htmlparser.py index a64f213..077f1e0 100644 --- a/nvchecker_source/htmlparser.py +++ b/nvchecker_source/htmlparser.py @@ -15,7 +15,13 @@ async def get_version_impl(info): encoding = conf.get('encoding') parser = html.HTMLParser(encoding=encoding) - res = await session.get(conf['url']) + data = conf.get('post_data') + if data is None: + res = await session.get(conf['url']) + else: + res = await session.post(conf['url'], body = data, headers = { + 'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded') + }) doc = html.fromstring(res.body, base_url=conf['url'], parser=parser) try: diff --git a/nvchecker_source/regex.py b/nvchecker_source/regex.py index 1660b4f..48406e1 100644 --- a/nvchecker_source/regex.py +++ b/nvchecker_source/regex.py @@ -20,7 +20,13 @@ async def get_version_impl(info): encoding = conf.get('encoding', 'latin1') - res = await session.get(conf['url']) + data = conf.get('post_data') + if data is None: + res = await session.get(conf['url']) + else: + res = await session.post(conf['url'], body = data, headers = { + 'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded') + }) body = res.body.decode(encoding) versions = regex.findall(body) if not versions and not conf.get('missing_ok', False): diff --git a/tests/test_regex.py b/tests/test_regex.py index e54bca1..1b4c255 100644 --- a/tests/test_regex.py +++ b/tests/test_regex.py @@ -87,3 +87,36 @@ async def test_regex_bad_ssl(get_version, httpbin_secure): else: assert False, 'certificate should not be trusted' +async def test_regex_post(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"ABCDEF":\s*"(\w+)"', + "post_data": "ABCDEF=234&CDEFG=xyz" + }) == "234" + +async def test_regex_post2(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"CDEFG":\s*"(\w+)"', + "post_data": "ABCDEF=234&CDEFG=xyz" + }) == "xyz" + +async def test_regex_post_json(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"ABCDEF":\s*(\w+)', + "post_data": '{"ABCDEF":234,"CDEFG":"xyz"}', + "post_data_type": "application/json" + }) == "234" + +async def test_regex_post_json2(get_version, httpbin): + assert await get_version("example", { + "source": "regex", + "url": httpbin.url + "/post", + "regex": r'"CDEFG":\s*"(\w+)"', + "post_data": '{"ABCDEF":234,"CDEFG":"xyz"}', + "post_data_type": "application/json" + }) == "xyz"