Support HTTP POST request in the htmlparser and regex source

Example:

    source = "regex"
    regex = "spcm_linux_libs_v[0-9a-zA-Z]*"
    url = "https://spectrum-instrumentation.com/spcm_downloads_downloads_ajax"
    post_data = "series%5B%5D=273&families%5B%5D=475"
This commit is contained in:
Yichao Yu 2021-09-05 16:26:30 -04:00
parent c65a5343ed
commit c43d4e900f
No known key found for this signature in database
GPG Key ID: 07F45E2A1937DD32
4 changed files with 59 additions and 2 deletions

View File

@ -269,6 +269,12 @@ regex
When multiple version strings are found, the maximum of those is chosen.
post_data
(*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option.
post_data_type
(*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``.
This source supports :ref:`list options`.
Search in an HTTP header
@ -312,6 +318,12 @@ url
xpath
An xpath expression used to find the version string.
post_data
(*Optional*) When present, a ``POST`` request (instead of a ``GET``) will be used. The value should be a string containing the full body of the request. The encoding of the string can be specified using the ``post_data_type`` option.
post_data_type
(*Optional*) Specifies the ``Content-Type`` of the request body (``post_data``). By default, this is ``application/x-www-form-urlencoded``.
.. note::
An additional dependency "lxml" is required.

View File

@ -15,7 +15,13 @@ async def get_version_impl(info):
encoding = conf.get('encoding')
parser = html.HTMLParser(encoding=encoding)
res = await session.get(conf['url'])
data = conf.get('post_data')
if data is None:
res = await session.get(conf['url'])
else:
res = await session.post(conf['url'], body = data, headers = {
'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded')
})
doc = html.fromstring(res.body, base_url=conf['url'], parser=parser)
try:

View File

@ -20,7 +20,13 @@ async def get_version_impl(info):
encoding = conf.get('encoding', 'latin1')
res = await session.get(conf['url'])
data = conf.get('post_data')
if data is None:
res = await session.get(conf['url'])
else:
res = await session.post(conf['url'], body = data, headers = {
'Content-Type': conf.get('post_data_type', 'application/x-www-form-urlencoded')
})
body = res.body.decode(encoding)
versions = regex.findall(body)
if not versions and not conf.get('missing_ok', False):

View File

@ -87,3 +87,36 @@ async def test_regex_bad_ssl(get_version, httpbin_secure):
else:
assert False, 'certificate should not be trusted'
async def test_regex_post(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"ABCDEF":\s*"(\w+)"',
"post_data": "ABCDEF=234&CDEFG=xyz"
}) == "234"
async def test_regex_post2(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"CDEFG":\s*"(\w+)"',
"post_data": "ABCDEF=234&CDEFG=xyz"
}) == "xyz"
async def test_regex_post_json(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"ABCDEF":\s*(\w+)',
"post_data": '{"ABCDEF":234,"CDEFG":"xyz"}',
"post_data_type": "application/json"
}) == "234"
async def test_regex_post_json2(get_version, httpbin):
assert await get_version("example", {
"source": "regex",
"url": httpbin.url + "/post",
"regex": r'"CDEFG":\s*"(\w+)"',
"post_data": '{"ABCDEF":234,"CDEFG":"xyz"}',
"post_data_type": "application/json"
}) == "xyz"