From 61c421094f12e5b0bce6c76483de9cc2d12e1a72 Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Thu, 6 Jun 2019 17:53:07 +0800 Subject: [PATCH] updated spiders --- crawlab/routes/spiders.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crawlab/routes/spiders.py b/crawlab/routes/spiders.py index a0a902da..04e1eb96 100644 --- a/crawlab/routes/spiders.py +++ b/crawlab/routes/spiders.py @@ -509,7 +509,10 @@ class SpiderApi(BaseApi): }, r.status_code # get html parse tree - sel = etree.HTML(r.content.decode('utf-8')) + try: + sel = etree.HTML(r.content.decode('utf-8')) + except Exception as err: + sel = etree.HTML(r.content) # remove unnecessary tags unnecessary_tags = [