Merge branch 'develop' of github.com:tikazyq/crawlab into develop

This commit is contained in:
Marvin Zhang
2019-06-06 20:16:47 +08:00

View File

@@ -509,7 +509,10 @@ class SpiderApi(BaseApi):
}, r.status_code
# get html parse tree
sel = etree.HTML(r.content)
try:
sel = etree.HTML(r.content.decode('utf-8'))
except Exception as err:
sel = etree.HTML(r.content)
# remove unnecessary tags
unnecessary_tags = [