diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py index a0a902da..04e1eb96 100644 --- a/crawlab/routes/spiders.py +++ b/crawlab/routes/spiders.py @@ -509,7 +509,10 @@ class SpiderApi(BaseApi): }, r.status_code # get html parse tree - sel = etree.HTML(r.content.decode('utf-8')) + try: + sel = etree.HTML(r.content.decode('utf-8')) + except Exception as err: + sel = etree.HTML(r.content) # remove unnecessary tags unnecessary_tags = [