From 12e99921aeb4cb112e37ebf9640edd6974f047de Mon Sep 17 00:00:00 2001 From: Marvin Zhang Date: Sun, 7 Jul 2019 09:03:02 +0800 Subject: [PATCH] updated sinastock_spider --- spiders/sinastock/sinastock/spiders/sinastock_spider.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spiders/sinastock/sinastock/spiders/sinastock_spider.py b/spiders/sinastock/sinastock/spiders/sinastock_spider.py index 9d258e6c..95c9df86 100644 --- a/spiders/sinastock/sinastock/spiders/sinastock_spider.py +++ b/spiders/sinastock/sinastock/spiders/sinastock_spider.py @@ -18,12 +18,13 @@ class SinastockSpiderSpider(scrapy.Spider): ) db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test'] col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'stock_news') + page_num = os.environ.get('PAGE_NUM') or 3 def start_requests(self): col = self.db['stocks'] for s in col.find({}): code, ex = s['ts_code'].split('.') - for i in range(10): + for i in range(self.page_num): url = f'http://vip.stock.finance.sina.com.cn/corp/view/vCB_AllNewsStock.php?symbol={ex.lower()}{code}&Page={i + 1}' yield scrapy.Request( url=url,