updated spiders

This commit is contained in:
Marvin Zhang
2019-06-06 17:53:07 +08:00
parent 40743e2f0a
commit 61c421094f

View File

@@ -509,7 +509,10 @@ class SpiderApi(BaseApi):
}, r.status_code
# get html parse tree
sel = etree.HTML(r.content.decode('utf-8'))
try:
sel = etree.HTML(r.content.decode('utf-8'))
except Exception as err:
sel = etree.HTML(r.content)
# remove unnecessary tags
unnecessary_tags = [