From 69e2e2f3c5fa3a4a2db2a694dd0c149a20f4ab8c Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Fri, 10 May 2019 12:39:10 +0800 Subject: [PATCH] added chinaz spider --- spiders/chinaz/chinaz/pipelines.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spiders/chinaz/chinaz/pipelines.py b/spiders/chinaz/chinaz/pipelines.py index 5758153a..ad3fbb23 100644 --- a/spiders/chinaz/chinaz/pipelines.py +++ b/spiders/chinaz/chinaz/pipelines.py @@ -23,5 +23,6 @@ class MongoPipeline(object): def process_item(self, item, spider): item['task_id'] = os.environ.get('CRAWLAB_TASK_ID') item['_id'] = item['domain'] - self.col.save(item) + if self.col.find_one({'_id': item['_id']}) is not None: + self.col.save(item) return item