htmlparser: handle xpath pointing to element instead of text()

This commit is contained in:
lilydjwg 2022-06-12 12:31:11 +08:00
parent e683476fb2
commit 8b32c26d1e
2 changed files with 16 additions and 1 deletions

View File

@ -25,10 +25,17 @@ async def get_version_impl(info):
doc = html.fromstring(res.body, base_url=conf['url'], parser=parser) doc = html.fromstring(res.body, base_url=conf['url'], parser=parser)
try: try:
version = doc.xpath(conf.get('xpath')) els = doc.xpath(conf.get('xpath'))
except ValueError: except ValueError:
if not conf.get('missing_ok', False): if not conf.get('missing_ok', False):
raise GetVersionError('version string not found.') raise GetVersionError('version string not found.')
except etree.XPathEvalError as e: except etree.XPathEvalError as e:
raise GetVersionError('bad xpath', exc_info=e) raise GetVersionError('bad xpath', exc_info=e)
version = [
str(el)
if isinstance(el, str)
else str(el.text_content())
for el in els
]
return version return version

View File

@ -19,3 +19,11 @@ async def test_xpath_missing_ok(get_version):
"xpath": '//pre[@class="test-is-ok"]/text()', "xpath": '//pre[@class="test-is-ok"]/text()',
"missing_ok": True, "missing_ok": True,
}) is None }) is None
async def test_xpath_element(get_version):
assert await get_version("unifiedremote", {
"source": "htmlparser",
"url": "http://httpbin.org/",
"xpath": '//pre[@class="version"]',
}) != None