support send log to ES

2026-01-22 17:31:03 +01:00 · 2020-04-27 15:59:06 +08:00
parent dd6f4fb59b
commit a9ae673d52
131 changed files with 4227 additions and 15 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 .idea/
+.vscode/

 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/backend/app/spiders/amazon_config/Spiderfile
+++ b/backend/app/spiders/amazon_config/Spiderfile
@@ -0,0 +1,51 @@
+name: "amazon_config"
+display_name: "亚马逊中国（可配置）"
+remark: "亚马逊中国搜索手机，列表+分页"
+type: "configurable"
+col: "results_amazon_config"
+engine: scrapy
+start_url: https://www.amazon.cn/s?k=%E6%89%8B%E6%9C%BA&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&ref=nb_sb_noss_2
+start_stage: list
+stages:
+- name: list
+  is_list: true
+  list_css: .s-result-item
+  list_xpath: ""
+  page_css: .a-last > a
+  page_xpath: ""
+  page_attr: href
+  fields:
+  - name: title
+    css: span.a-text-normal
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: url
+    css: .a-link-normal
+    xpath: ""
+    attr: href
+    next_stage: ""
+    remark: ""
+  - name: price
+    css: ""
+    xpath: .//*[@class="a-price-whole"]
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: price_fraction
+    css: ""
+    xpath: .//*[@class="a-price-fraction"]
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: img
+    css: .s-image-square-aspect > img
+    xpath: ""
+    attr: src
+    next_stage: ""
+    remark: ""
+settings:
+  ROBOTSTXT_OBEY: "false"
+  USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
+    like Gecko) Chrome/78.0.3904.108 Safari/537.36
--- a/backend/app/spiders/amazon_config/config_spider/init.py
+++ b/backend/app/spiders/amazon_config/config_spider/init.py
--- a/backend/app/spiders/amazon_config/config_spider/items.py
+++ b/backend/app/spiders/amazon_config/config_spider/items.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class Item(scrapy.Item):
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    ts = scrapy.Field()
+    title = scrapy.Field()
+    url = scrapy.Field()
+    price = scrapy.Field()
+    price_fraction = scrapy.Field()
+    img = scrapy.Field()
+
--- a/backend/app/spiders/amazon_config/config_spider/middlewares.py
+++ b/backend/app/spiders/amazon_config/config_spider/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class ConfigSpiderSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ConfigSpiderDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/amazon_config/config_spider/pipelines.py
+++ b/backend/app/spiders/amazon_config/config_spider/pipelines.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+import os
+from pymongo import MongoClient
+
+mongo = MongoClient(
+    host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
+    port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
+    username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
+    password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
+    authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
+)
+db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
+col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
+task_id = os.environ.get('CRAWLAB_TASK_ID')
+
+class ConfigSpiderPipeline(object):
+    def process_item(self, item, spider):
+        item['task_id'] = task_id
+        if col is not None:
+            col.save(item)
+        return item
--- a/backend/app/spiders/amazon_config/config_spider/settings.py
+++ b/backend/app/spiders/amazon_config/config_spider/settings.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import os
+import re
+import json
+
+# Scrapy settings for config_spider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'Crawlab Configurable Spider'
+
+SPIDER_MODULES = ['config_spider.spiders']
+NEWSPIDER_MODULE = 'config_spider.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Crawlab Spider'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'config_spider.pipelines.ConfigSpiderPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
+    setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
+    setting_value = os.environ.get(setting_env_name)
+    if setting_value.lower() == 'true':
+        setting_value = True
+    elif setting_value.lower() == 'false':
+        setting_value = False
+    elif re.search(r'^\d+$', setting_value) is not None:
+        setting_value = int(setting_value)
+    elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    else:
+        pass
+    locals()[setting_name] = setting_value
+
--- a/backend/app/spiders/amazon_config/config_spider/spiders/init.py
+++ b/backend/app/spiders/amazon_config/config_spider/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/amazon_config/config_spider/spiders/spider.py
+++ b/backend/app/spiders/amazon_config/config_spider/spiders/spider.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+from config_spider.items import Item
+from urllib.parse import urljoin, urlparse
+
+def get_real_url(response, url):
+    if re.search(r'^https?', url):
+        return url
+    elif re.search(r'^\/\/', url):
+        u = urlparse(response.url)
+        return u.scheme + url
+    return urljoin(response.url, url)
+
+class ConfigSpider(scrapy.Spider):
+    name = 'config_spider'
+
+    def start_requests(self):
+        yield scrapy.Request(url='https://www.amazon.cn/s?k=%E6%89%8B%E6%9C%BA&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&ref=nb_sb_noss_2', callback=self.parse_list)
+
+    def parse_list(self, response):
+        prev_item = response.meta.get('item')
+        for elem in response.css('.s-result-item'):
+            item = Item()
+            item['title'] = elem.css('span.a-text-normal::text').extract_first()
+            item['url'] = elem.css('.a-link-normal::attr("href")').extract_first()
+            item['price'] = elem.xpath('string(.//*[@class="a-price-whole"])').extract_first()
+            item['price_fraction'] = elem.xpath('string(.//*[@class="a-price-fraction"])').extract_first()
+            item['img'] = elem.css('.s-image-square-aspect > img::attr("src")').extract_first()
+            if prev_item is not None:
+                for key, value in prev_item.items():
+                    item[key] = value
+            yield item
+        next_url = response.css('.a-last > a::attr("href")').extract_first()
+        yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_list, meta={'item': prev_item})
+
+
--- a/backend/app/spiders/amazon_config/md5.txt
+++ b/backend/app/spiders/amazon_config/md5.txt
@@ -0,0 +1 @@
+4b716dd3c15b993ccb7a9f0be1cc0de9
--- a/backend/app/spiders/amazon_config/scrapy.cfg
+++ b/backend/app/spiders/amazon_config/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = config_spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = config_spider
--- a/backend/app/spiders/autohome_config/Spiderfile
+++ b/backend/app/spiders/autohome_config/Spiderfile
@@ -0,0 +1,57 @@
+name: "autohome_config"
+display_name: "汽车之家（可配置）"
+remark: "汽车之家文章，列表+详情+分页"
+type: "configurable"
+col: "results_autohome_config"
+engine: scrapy
+start_url: https://www.autohome.com.cn/all/
+start_stage: list
+stages:
+- name: list
+  is_list: true
+  list_css: ul.article > li
+  list_xpath: ""
+  page_css: a.page-item-next
+  page_xpath: ""
+  page_attr: href
+  fields:
+  - name: title
+    css: li > a > h3
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: url
+    css: li > a
+    xpath: ""
+    attr: href
+    next_stage: ""
+    remark: ""
+  - name: abstract
+    css: li > a > p
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: time
+    css: li > a .fn-left
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: views
+    css: li > a .fn-right > em:first-child
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: comments
+    css: li > a .fn-right > em:last-child
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+settings:
+  ROBOTSTXT_OBEY: "false"
+  USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
+    like Gecko) Chrome/78.0.3904.108 Safari/537.36
--- a/backend/app/spiders/autohome_config/config_spider/init.py
+++ b/backend/app/spiders/autohome_config/config_spider/init.py
--- a/backend/app/spiders/autohome_config/config_spider/items.py
+++ b/backend/app/spiders/autohome_config/config_spider/items.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class Item(scrapy.Item):
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    ts = scrapy.Field()
+    title = scrapy.Field()
+    url = scrapy.Field()
+    abstract = scrapy.Field()
+    time = scrapy.Field()
+    views = scrapy.Field()
+    comments = scrapy.Field()
+
--- a/backend/app/spiders/autohome_config/config_spider/middlewares.py
+++ b/backend/app/spiders/autohome_config/config_spider/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class ConfigSpiderSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ConfigSpiderDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/autohome_config/config_spider/pipelines.py
+++ b/backend/app/spiders/autohome_config/config_spider/pipelines.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+import os
+from pymongo import MongoClient
+
+mongo = MongoClient(
+    host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
+    port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
+    username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
+    password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
+    authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
+)
+db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
+col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
+task_id = os.environ.get('CRAWLAB_TASK_ID')
+
+class ConfigSpiderPipeline(object):
+    def process_item(self, item, spider):
+        item['task_id'] = task_id
+        if col is not None:
+            col.save(item)
+        return item
--- a/backend/app/spiders/autohome_config/config_spider/settings.py
+++ b/backend/app/spiders/autohome_config/config_spider/settings.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import os
+import re
+import json
+
+# Scrapy settings for config_spider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'Crawlab Configurable Spider'
+
+SPIDER_MODULES = ['config_spider.spiders']
+NEWSPIDER_MODULE = 'config_spider.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Crawlab Spider'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'config_spider.pipelines.ConfigSpiderPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
+    setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
+    setting_value = os.environ.get(setting_env_name)
+    if setting_value.lower() == 'true':
+        setting_value = True
+    elif setting_value.lower() == 'false':
+        setting_value = False
+    elif re.search(r'^\d+$', setting_value) is not None:
+        setting_value = int(setting_value)
+    elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    else:
+        pass
+    locals()[setting_name] = setting_value
+
--- a/backend/app/spiders/autohome_config/config_spider/spiders/init.py
+++ b/backend/app/spiders/autohome_config/config_spider/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/autohome_config/config_spider/spiders/spider.py
+++ b/backend/app/spiders/autohome_config/config_spider/spiders/spider.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+from config_spider.items import Item
+from urllib.parse import urljoin, urlparse
+
+def get_real_url(response, url):
+    if re.search(r'^https?', url):
+        return url
+    elif re.search(r'^\/\/', url):
+        u = urlparse(response.url)
+        return u.scheme + url
+    return urljoin(response.url, url)
+
+class ConfigSpider(scrapy.Spider):
+    name = 'config_spider'
+
+    def start_requests(self):
+        yield scrapy.Request(url='https://www.autohome.com.cn/all/', callback=self.parse_list)
+
+    def parse_list(self, response):
+        prev_item = response.meta.get('item')
+        for elem in response.css('ul.article > li'):
+            item = Item()
+            item['title'] = elem.css('li > a > h3::text').extract_first()
+            item['url'] = elem.css('li > a::attr("href")').extract_first()
+            item['abstract'] = elem.css('li > a > p::text').extract_first()
+            item['time'] = elem.css('li > a .fn-left::text').extract_first()
+            item['views'] = elem.css('li > a .fn-right > em:first-child::text').extract_first()
+            item['comments'] = elem.css('li > a .fn-right > em:last-child::text').extract_first()
+            if prev_item is not None:
+                for key, value in prev_item.items():
+                    item[key] = value
+            yield item
+        next_url = response.css('a.page-item-next::attr("href")').extract_first()
+        yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_list, meta={'item': prev_item})
+
+
--- a/backend/app/spiders/autohome_config/md5.txt
+++ b/backend/app/spiders/autohome_config/md5.txt
@@ -0,0 +1 @@
+d784a11085e298eaf344eadc3a3e9411
--- a/backend/app/spiders/autohome_config/scrapy.cfg
+++ b/backend/app/spiders/autohome_config/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = config_spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = config_spider
--- a/backend/app/spiders/baidu_config/Spiderfile
+++ b/backend/app/spiders/baidu_config/Spiderfile
@@ -0,0 +1,39 @@
+name: "baidu_config"
+display_name: "百度搜索（可配置）"
+remark: "百度搜索Crawlab，列表+分页"
+type: "configurable"
+col: "results_baidu_config"
+engine: scrapy
+start_url: http://www.baidu.com/s?wd=crawlab
+start_stage: list
+stages:
+- name: list
+  is_list: true
+  list_css: ".result.c-container"
+  list_xpath: ""
+  page_css: "a.n"
+  page_xpath: ""
+  page_attr: href
+  fields:
+  - name: title
+    css: ""
+    xpath: .//h3/a
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: url
+    css: ""
+    xpath: .//h3/a
+    attr: href
+    next_stage: ""
+    remark: ""
+  - name: abstract
+    css: ""
+    xpath: .//*[@class="c-abstract"]
+    attr: ""
+    next_stage: ""
+    remark: ""
+settings:
+  ROBOTSTXT_OBEY: "false"
+  USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
+    like Gecko) Chrome/78.0.3904.108 Safari/537.36
--- a/backend/app/spiders/baidu_config/config_spider/init.py
+++ b/backend/app/spiders/baidu_config/config_spider/init.py
--- a/backend/app/spiders/baidu_config/config_spider/items.py
+++ b/backend/app/spiders/baidu_config/config_spider/items.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class Item(scrapy.Item):
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    ts = scrapy.Field()
+    title = scrapy.Field()
+    url = scrapy.Field()
+    abstract = scrapy.Field()
+
--- a/backend/app/spiders/baidu_config/config_spider/middlewares.py
+++ b/backend/app/spiders/baidu_config/config_spider/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class ConfigSpiderSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ConfigSpiderDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/baidu_config/config_spider/pipelines.py
+++ b/backend/app/spiders/baidu_config/config_spider/pipelines.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+import os
+from pymongo import MongoClient
+
+mongo = MongoClient(
+    host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
+    port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
+    username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
+    password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
+    authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
+)
+db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
+col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
+task_id = os.environ.get('CRAWLAB_TASK_ID')
+
+class ConfigSpiderPipeline(object):
+    def process_item(self, item, spider):
+        item['task_id'] = task_id
+        if col is not None:
+            col.save(item)
+        return item
--- a/backend/app/spiders/baidu_config/config_spider/settings.py
+++ b/backend/app/spiders/baidu_config/config_spider/settings.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import os
+import re
+import json
+
+# Scrapy settings for config_spider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'Crawlab Configurable Spider'
+
+SPIDER_MODULES = ['config_spider.spiders']
+NEWSPIDER_MODULE = 'config_spider.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Crawlab Spider'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'config_spider.pipelines.ConfigSpiderPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
+    setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
+    setting_value = os.environ.get(setting_env_name)
+    if setting_value.lower() == 'true':
+        setting_value = True
+    elif setting_value.lower() == 'false':
+        setting_value = False
+    elif re.search(r'^\d+$', setting_value) is not None:
+        setting_value = int(setting_value)
+    elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    else:
+        pass
+    locals()[setting_name] = setting_value
+
--- a/backend/app/spiders/baidu_config/config_spider/spiders/init.py
+++ b/backend/app/spiders/baidu_config/config_spider/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/baidu_config/config_spider/spiders/spider.py
+++ b/backend/app/spiders/baidu_config/config_spider/spiders/spider.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+from config_spider.items import Item
+from urllib.parse import urljoin, urlparse
+
+def get_real_url(response, url):
+    if re.search(r'^https?', url):
+        return url
+    elif re.search(r'^\/\/', url):
+        u = urlparse(response.url)
+        return u.scheme + url
+    return urljoin(response.url, url)
+
+class ConfigSpider(scrapy.Spider):
+    name = 'config_spider'
+
+    def start_requests(self):
+        yield scrapy.Request(url='http://www.baidu.com/s?wd=crawlab', callback=self.parse_list)
+
+    def parse_list(self, response):
+        prev_item = response.meta.get('item')
+        for elem in response.css('.result.c-container'):
+            item = Item()
+            item['title'] = elem.xpath('string(.//h3/a)').extract_first()
+            item['url'] = elem.xpath('.//h3/a/@href').extract_first()
+            item['abstract'] = elem.xpath('string(.//*[@class="c-abstract"])').extract_first()
+            if prev_item is not None:
+                for key, value in prev_item.items():
+                    item[key] = value
+            yield item
+        next_url = response.css('a.n::attr("href")').extract_first()
+        yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_list, meta={'item': prev_item})
+
+
--- a/backend/app/spiders/baidu_config/md5.txt
+++ b/backend/app/spiders/baidu_config/md5.txt
@@ -0,0 +1 @@
+ba25f6f3567b256473d3f0ec6af783fd
--- a/backend/app/spiders/baidu_config/scrapy.cfg
+++ b/backend/app/spiders/baidu_config/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = config_spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = config_spider
--- a/backend/app/spiders/bing_general/Spiderfile
+++ b/backend/app/spiders/bing_general/Spiderfile
@@ -0,0 +1,6 @@
+name: "bing_general"
+display_name: "必应搜索 (通用)"
+remark: "必应搜索 Crawlab，列表+分页"
+col: "results_bing_general"
+type: "customized"
+cmd: "python bing_spider.py"
--- a/backend/app/spiders/bing_general/bing_spider.py
+++ b/backend/app/spiders/bing_general/bing_spider.py
@@ -0,0 +1,41 @@
+import requests
+from bs4 import BeautifulSoup as bs
+from urllib.parse import urljoin, urlparse
+import re
+from crawlab import save_item
+
+s = requests.Session()
+
+def get_real_url(response, url):
+    if re.search(r'^https?', url):
+        return url
+    elif re.search(r'^\/\/', url):
+        u = urlparse(response.url)
+        return u.scheme + url
+    return urljoin(response.url, url)
+
+def start_requests():
+	for i in range(0, 9):
+		fr = 'PERE' if not i else 'MORE'
+		url = f'https://cn.bing.com/search?q=crawlab&first={10 * i + 1}&FROM={fr}'
+		request_page(url)
+
+def request_page(url):
+	print(f'requesting {url}')
+	r = s.get(url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'})
+	parse_list(r)
+
+def parse_list(response):
+	soup = bs(response.content.decode('utf-8'))
+	for el in list(soup.select('#b_results > li')):
+		try:
+			save_item({
+				'title': el.select_one('h2').text,
+				'url': el.select_one('h2 a').attrs.get('href'),
+				'abstract': el.select_one('.b_caption p').text,
+			})
+		except:
+			pass
+
+if __name__ == '__main__':
+	start_requests()
--- a/backend/app/spiders/bing_general/md5.txt
+++ b/backend/app/spiders/bing_general/md5.txt
@@ -0,0 +1 @@
+cf295b694a20c99c4857f838aa0402a7
--- a/backend/app/spiders/chinaz/Spiderfile
+++ b/backend/app/spiders/chinaz/Spiderfile
@@ -0,0 +1,5 @@
+name: "chinaz"
+display_name: "站长之家 (Scrapy)"
+col: "results_chinaz"
+type: "customized"
+cmd: "scrapy crawl chinaz_spider"
--- a/backend/app/spiders/chinaz/chinaz/init.py
+++ b/backend/app/spiders/chinaz/chinaz/init.py
--- a/backend/app/spiders/chinaz/chinaz/items.py
+++ b/backend/app/spiders/chinaz/chinaz/items.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class ChinazItem(scrapy.Item):
+    # define the fields for your item here like:
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    name = scrapy.Field()
+    domain = scrapy.Field()
+    description = scrapy.Field()
+    rank = scrapy.Field()
+    main_category = scrapy.Field()
+    category = scrapy.Field()
+    location = scrapy.Field()
--- a/backend/app/spiders/chinaz/chinaz/middlewares.py
+++ b/backend/app/spiders/chinaz/chinaz/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class ChinazSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Response, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ChinazDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/chinaz/chinaz/pipelines.py
+++ b/backend/app/spiders/chinaz/chinaz/pipelines.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+
--- a/backend/app/spiders/chinaz/chinaz/settings.py
+++ b/backend/app/spiders/chinaz/chinaz/settings.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for chinaz project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://doc.scrapy.org/en/latest/topics/settings.html
+#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'chinaz'
+
+SPIDER_MODULES = ['chinaz.spiders']
+NEWSPIDER_MODULE = 'chinaz.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'chinaz (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'chinaz.middlewares.ChinazSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'chinaz.middlewares.ChinazDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://doc.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'crawlab.pipelines.CrawlabMongoPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
--- a/backend/app/spiders/chinaz/chinaz/spiders/init.py
+++ b/backend/app/spiders/chinaz/chinaz/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/chinaz/chinaz/spiders/chinaz_spider.py
+++ b/backend/app/spiders/chinaz/chinaz/spiders/chinaz_spider.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+import scrapy
+from chinaz.items import ChinazItem
+
+
+class ChinazSpiderSpider(scrapy.Spider):
+    name = 'chinaz_spider'
+    allowed_domains = ['chinaz.com']
+    start_urls = ['http://top.chinaz.com/hangye/']
+
+    def parse(self, response):
+        for item in response.css('.listCentent > li'):
+            name = item.css('h3.rightTxtHead > a::text').extract_first()
+            href = item.css('h3.rightTxtHead > a::attr("href")').extract_first()
+            domain = item.css('h3.rightTxtHead > span::text').extract_first()
+            description = item.css('p.RtCInfo::text').extract_first()
+            rank = item.css('.RtCRateCent > strong::text').extract_first()
+            rank = int(rank)
+            item = ChinazItem(
+                _id=domain,
+                name=name,
+                domain=domain,
+                description=description,
+                rank=rank,
+            )
+            yield scrapy.Request(
+                url='http://top.chinaz.com' + href,
+                callback=self.parse_item,
+                meta={
+                    'item': item
+                }
+            )
+
+        # pagination
+        a_list = response.css('.ListPageWrap > a::attr("href")').extract()
+        url = 'http://top.chinaz.com/hangye/' + a_list[-1]
+        yield scrapy.Request(url=url, callback=self.parse)
+
+    def parse_item(self, response):
+        item = response.meta['item']
+
+        # category info extraction
+        arr = response.css('.TopMainTag-show .SimSun')
+        res1 = arr[0].css('a::text').extract()
+        main_category = res1[0]
+        if len(res1) == 1:
+            category = '其他'
+        else:
+            category = res1[1]
+
+        # location info extraction
+        res2 = arr[1].css('a::text').extract()
+        if len(res2) > 0:
+            location = res2[0]
+        else:
+            location = '其他'
+
+        # assign values to item
+        item['main_category'] = main_category
+        item['category'] = category
+        item['location'] = location
+
+        yield item
--- a/backend/app/spiders/chinaz/md5.txt
+++ b/backend/app/spiders/chinaz/md5.txt
@@ -0,0 +1 @@
+1976593e49bf0238602ce35d051bd137
--- a/backend/app/spiders/chinaz/scrapy.cfg
+++ b/backend/app/spiders/chinaz/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = chinaz.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = chinaz
--- a/backend/app/spiders/csdn_config/Spiderfile
+++ b/backend/app/spiders/csdn_config/Spiderfile
@@ -0,0 +1,60 @@
+name: "csdn_config"
+display_name: "CSDN（可配置）"
+remark: "CSDN Crawlab 文章，列表+详情+分页"
+type: "configurable"
+col: "results_csdn_config"
+engine: scrapy
+start_url: https://so.csdn.net/so/search/s.do?q=crawlab
+start_stage: list
+stages:
+- name: list
+  is_list: true
+  list_css: .search-list-con > .search-list
+  list_xpath: ""
+  page_css: a.btn-next
+  page_xpath: ""
+  page_attr: href
+  fields:
+  - name: url
+    css: ""
+    xpath: .//*[@class="limit_width"]/a
+    attr: href
+    next_stage: detail
+    remark: ""
+- name: detail
+  is_list: false
+  list_css: ""
+  list_xpath: ""
+  page_css: ""
+  page_xpath: ""
+  page_attr: ""
+  fields:
+  - name: content
+    css: ""
+    xpath: .//div[@id="content_views"]
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: views
+    css: .read-count
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: title
+    css: .title-article
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: author
+    css: .follow-nickName
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+settings:
+  AUTOTHROTTLE_ENABLED: "false"
+  ROBOTSTXT_OBEY: "false"
+  USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
+    like Gecko) Chrome/79.0.3945.117 Safari/537.36
--- a/backend/app/spiders/csdn_config/config_spider/init.py
+++ b/backend/app/spiders/csdn_config/config_spider/init.py
--- a/backend/app/spiders/csdn_config/config_spider/items.py
+++ b/backend/app/spiders/csdn_config/config_spider/items.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class Item(scrapy.Item):
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    ts = scrapy.Field()
+    url = scrapy.Field()
+    content = scrapy.Field()
+    views = scrapy.Field()
+    title = scrapy.Field()
+    author = scrapy.Field()
+
--- a/backend/app/spiders/csdn_config/config_spider/middlewares.py
+++ b/backend/app/spiders/csdn_config/config_spider/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class ConfigSpiderSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ConfigSpiderDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/csdn_config/config_spider/pipelines.py
+++ b/backend/app/spiders/csdn_config/config_spider/pipelines.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+import os
+from pymongo import MongoClient
+
+mongo = MongoClient(
+    host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
+    port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
+    username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
+    password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
+    authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
+)
+db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
+col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
+task_id = os.environ.get('CRAWLAB_TASK_ID')
+
+class ConfigSpiderPipeline(object):
+    def process_item(self, item, spider):
+        item['task_id'] = task_id
+        if col is not None:
+            col.save(item)
+        return item
--- a/backend/app/spiders/csdn_config/config_spider/settings.py
+++ b/backend/app/spiders/csdn_config/config_spider/settings.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import os
+import re
+import json
+
+# Scrapy settings for config_spider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'Crawlab Configurable Spider'
+
+SPIDER_MODULES = ['config_spider.spiders']
+NEWSPIDER_MODULE = 'config_spider.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Crawlab Spider'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'config_spider.pipelines.ConfigSpiderPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
+    setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
+    setting_value = os.environ.get(setting_env_name)
+    if setting_value.lower() == 'true':
+        setting_value = True
+    elif setting_value.lower() == 'false':
+        setting_value = False
+    elif re.search(r'^\d+$', setting_value) is not None:
+        setting_value = int(setting_value)
+    elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    else:
+        pass
+    locals()[setting_name] = setting_value
+
--- a/backend/app/spiders/csdn_config/config_spider/spiders/init.py
+++ b/backend/app/spiders/csdn_config/config_spider/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/csdn_config/config_spider/spiders/spider.py
+++ b/backend/app/spiders/csdn_config/config_spider/spiders/spider.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+from config_spider.items import Item
+from urllib.parse import urljoin, urlparse
+
+def get_real_url(response, url):
+    if re.search(r'^https?', url):
+        return url
+    elif re.search(r'^\/\/', url):
+        u = urlparse(response.url)
+        return u.scheme + url
+    return urljoin(response.url, url)
+
+class ConfigSpider(scrapy.Spider):
+    name = 'config_spider'
+
+    def start_requests(self):
+        yield scrapy.Request(url='https://so.csdn.net/so/search/s.do?q=crawlab', callback=self.parse_list)
+
+    def parse_list(self, response):
+        prev_item = response.meta.get('item')
+        for elem in response.css('.search-list-con > .search-list'):
+            item = Item()
+            item['url'] = elem.xpath('.//*[@class="limit_width"]/a/@href').extract_first()
+            if prev_item is not None:
+                for key, value in prev_item.items():
+                    item[key] = value
+            yield scrapy.Request(url=get_real_url(response, item['url']), callback=self.parse_detail, meta={'item': item})
+        next_url = response.css('a.btn-next::attr("href")').extract_first()
+        yield scrapy.Request(url=get_real_url(response, next_url), callback=self.parse_list, meta={'item': prev_item})
+
+    def parse_detail(self, response):
+        item = Item() if response.meta.get('item') is None else response.meta.get('item')
+        item['content'] = response.xpath('string(.//div[@id="content_views"])').extract_first()
+        item['views'] = response.css('.read-count::text').extract_first()
+        item['title'] = response.css('.title-article::text').extract_first()
+        item['author'] = response.css('.follow-nickName::text').extract_first()
+        yield item
+
+
--- a/backend/app/spiders/csdn_config/md5.txt
+++ b/backend/app/spiders/csdn_config/md5.txt
@@ -0,0 +1 @@
+b6889c74e006a5e619b525d84db62ffd
--- a/backend/app/spiders/csdn_config/scrapy.cfg
+++ b/backend/app/spiders/csdn_config/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = config_spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = config_spider
--- a/backend/app/spiders/douban_config/Spiderfile
+++ b/backend/app/spiders/douban_config/Spiderfile
@@ -0,0 +1,57 @@
+name: "douban_config"
+display_name: "豆瓣读书（可配置）"
+remark: "豆瓣读书新书推荐，列表"
+type: "configurable"
+col: "results_douban_config"
+engine: scrapy
+start_url: https://book.douban.com/latest
+start_stage: list
+stages:
+- name: list
+  is_list: true
+  list_css: ul.cover-col-4 > li
+  list_xpath: ""
+  page_css: ""
+  page_xpath: ""
+  page_attr: ""
+  fields:
+  - name: title
+    css: h2 > a
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: url
+    css: h2 > a
+    xpath: ""
+    attr: href
+    next_stage: ""
+    remark: ""
+  - name: img
+    css: a.cover img
+    xpath: ""
+    attr: src
+    next_stage: ""
+    remark: ""
+  - name: rating
+    css: p.rating > .color-lightgray
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: abstract
+    css: p:last-child
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: info
+    css: .color-gray
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+settings:
+  ROBOTSTXT_OBEY: "false"
+  USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
+    like Gecko) Chrome/78.0.3904.108 Safari/537.36
--- a/backend/app/spiders/douban_config/config_spider/init.py
+++ b/backend/app/spiders/douban_config/config_spider/init.py
--- a/backend/app/spiders/douban_config/config_spider/items.py
+++ b/backend/app/spiders/douban_config/config_spider/items.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class Item(scrapy.Item):
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    ts = scrapy.Field()
+    title = scrapy.Field()
+    url = scrapy.Field()
+    img = scrapy.Field()
+    rating = scrapy.Field()
+    abstract = scrapy.Field()
+    info = scrapy.Field()
+
--- a/backend/app/spiders/douban_config/config_spider/middlewares.py
+++ b/backend/app/spiders/douban_config/config_spider/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class ConfigSpiderSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ConfigSpiderDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/douban_config/config_spider/pipelines.py
+++ b/backend/app/spiders/douban_config/config_spider/pipelines.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+import os
+from pymongo import MongoClient
+
+mongo = MongoClient(
+    host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
+    port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
+    username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
+    password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
+    authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
+)
+db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
+col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
+task_id = os.environ.get('CRAWLAB_TASK_ID')
+
+class ConfigSpiderPipeline(object):
+    def process_item(self, item, spider):
+        item['task_id'] = task_id
+        if col is not None:
+            col.save(item)
+        return item
--- a/backend/app/spiders/douban_config/config_spider/settings.py
+++ b/backend/app/spiders/douban_config/config_spider/settings.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import os
+import re
+import json
+
+# Scrapy settings for config_spider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'Crawlab Configurable Spider'
+
+SPIDER_MODULES = ['config_spider.spiders']
+NEWSPIDER_MODULE = 'config_spider.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Crawlab Spider'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'config_spider.pipelines.ConfigSpiderPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
+    setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
+    setting_value = os.environ.get(setting_env_name)
+    if setting_value.lower() == 'true':
+        setting_value = True
+    elif setting_value.lower() == 'false':
+        setting_value = False
+    elif re.search(r'^\d+$', setting_value) is not None:
+        setting_value = int(setting_value)
+    elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    else:
+        pass
+    locals()[setting_name] = setting_value
+
--- a/backend/app/spiders/douban_config/config_spider/spiders/init.py
+++ b/backend/app/spiders/douban_config/config_spider/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/douban_config/config_spider/spiders/spider.py
+++ b/backend/app/spiders/douban_config/config_spider/spiders/spider.py
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+from config_spider.items import Item
+from urllib.parse import urljoin, urlparse
+
+def get_real_url(response, url):
+    if re.search(r'^https?', url):
+        return url
+    elif re.search(r'^\/\/', url):
+        u = urlparse(response.url)
+        return u.scheme + url
+    return urljoin(response.url, url)
+
+class ConfigSpider(scrapy.Spider):
+    name = 'config_spider'
+
+    def start_requests(self):
+        yield scrapy.Request(url='https://book.douban.com/latest', callback=self.parse_list)
+
+    def parse_list(self, response):
+        prev_item = response.meta.get('item')
+        for elem in response.css('ul.cover-col-4 > li'):
+            item = Item()
+            item['title'] = elem.css('h2 > a::text').extract_first()
+            item['url'] = elem.css('h2 > a::attr("href")').extract_first()
+            item['img'] = elem.css('a.cover img::attr("src")').extract_first()
+            item['rating'] = elem.css('p.rating > .color-lightgray::text').extract_first()
+            item['abstract'] = elem.css('p:last-child::text').extract_first()
+            item['info'] = elem.css('.color-gray::text').extract_first()
+            if prev_item is not None:
+                for key, value in prev_item.items():
+                    item[key] = value
+            yield item
+
+
--- a/backend/app/spiders/douban_config/md5.txt
+++ b/backend/app/spiders/douban_config/md5.txt
@@ -0,0 +1 @@
+4d59a6c83b0e125d5321beae86bb93ce
--- a/backend/app/spiders/douban_config/scrapy.cfg
+++ b/backend/app/spiders/douban_config/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = config_spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = config_spider
--- a/backend/app/spiders/jd/Spiderfile
+++ b/backend/app/spiders/jd/Spiderfile
@@ -0,0 +1,5 @@
+name: "jd"
+display_name: "京东 (Scrapy)"
+col: "results_jd"
+type: "customized"
+cmd: "scrapy crawl jd_spider"
--- a/backend/app/spiders/jd/jd/init.py
+++ b/backend/app/spiders/jd/jd/init.py
--- a/backend/app/spiders/jd/jd/items.py
+++ b/backend/app/spiders/jd/jd/items.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class JdItem(scrapy.Item):
+    # define the fields for your item here like:
+    name = scrapy.Field()
+    price = scrapy.Field()
+    url = scrapy.Field()
--- a/backend/app/spiders/jd/jd/middlewares.py
+++ b/backend/app/spiders/jd/jd/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class JdSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Response, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class JdDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/jd/jd/pipelines.py
+++ b/backend/app/spiders/jd/jd/pipelines.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
--- a/backend/app/spiders/jd/jd/settings.py
+++ b/backend/app/spiders/jd/jd/settings.py
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for jd project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://doc.scrapy.org/en/latest/topics/settings.html
+#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'jd'
+
+SPIDER_MODULES = ['jd.spiders']
+NEWSPIDER_MODULE = 'jd.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+#USER_AGENT = 'jd (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = False
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'jd.middlewares.JdSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'jd.middlewares.JdDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://doc.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'crawlab.pipelines.CrawlabMongoPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
--- a/backend/app/spiders/jd/jd/spiders/init.py
+++ b/backend/app/spiders/jd/jd/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/jd/jd/spiders/jd_spider.py
+++ b/backend/app/spiders/jd/jd/spiders/jd_spider.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+import scrapy
+
+from jd.items import JdItem
+
+
+class JdSpiderSpider(scrapy.Spider):
+    name = 'jd_spider'
+    allowed_domains = ['jd.com']
+
+    def start_requests(self):
+    	for i in range(1, 50):
+    		yield scrapy.Request(url=f'https://search.jd.com/Search?keyword=手机&enc=utf-8&page={i}')
+
+    def parse(self, response):
+        for el in response.css('.gl-item'):
+            yield JdItem(
+                url=el.css('.p-name > a::attr("href")').extract_first(),
+                name=el.css('.p-name > a::attr("title")').extract_first(),
+                price=float(el.css('.p-price i::text').extract_first()),
+            )
--- a/backend/app/spiders/jd/md5.txt
+++ b/backend/app/spiders/jd/md5.txt
@@ -0,0 +1 @@
+621486d31459514eb27a082d159d9b8c
--- a/backend/app/spiders/jd/scrapy.cfg
+++ b/backend/app/spiders/jd/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = jd.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = jd
--- a/backend/app/spiders/sinastock/Spiderfile
+++ b/backend/app/spiders/sinastock/Spiderfile
@@ -0,0 +1,5 @@
+name: "sinastock"
+display_name: "新浪股票 (Scrapy)"
+type: "customized"
+col: "results_sinastock"
+cmd: "scrapy crawl sinastock_spider"
--- a/backend/app/spiders/sinastock/md5.txt
+++ b/backend/app/spiders/sinastock/md5.txt
@@ -0,0 +1 @@
+80bc091fa45ef4a85c9f1a66c81a4ed7
--- a/backend/app/spiders/sinastock/scrapy.cfg
+++ b/backend/app/spiders/sinastock/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = sinastock.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = sinastock
--- a/backend/app/spiders/sinastock/sinastock/init.py
+++ b/backend/app/spiders/sinastock/sinastock/init.py
--- a/backend/app/spiders/sinastock/sinastock/items.py
+++ b/backend/app/spiders/sinastock/sinastock/items.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class NewsItem(scrapy.Item):
+    # define the fields for your item here like:
+    _id = scrapy.Field()
+    title = scrapy.Field()
+    ts_str = scrapy.Field()
+    ts = scrapy.Field()
+    url = scrapy.Field()
+    text = scrapy.Field()
+    task_id = scrapy.Field()
+    source = scrapy.Field()
+    stocks = scrapy.Field()
--- a/backend/app/spiders/sinastock/sinastock/middlewares.py
+++ b/backend/app/spiders/sinastock/sinastock/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class SinastockSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Response, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class SinastockDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/sinastock/sinastock/pipelines.py
+++ b/backend/app/spiders/sinastock/sinastock/pipelines.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
--- a/backend/app/spiders/sinastock/sinastock/settings.py
+++ b/backend/app/spiders/sinastock/sinastock/settings.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+# Scrapy settings for sinastock project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://doc.scrapy.org/en/latest/topics/settings.html
+#     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'sinastock'
+
+SPIDER_MODULES = ['sinastock.spiders']
+NEWSPIDER_MODULE = 'sinastock.spiders'
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+# USER_AGENT = 'sinastock (+http://www.yourdomain.com)'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+# CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+# DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+# CONCURRENT_REQUESTS_PER_DOMAIN = 16
+# CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+# COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+# TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+# DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+# }
+
+# Enable or disable spider middlewares
+# See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+# SPIDER_MIDDLEWARES = {
+#    'sinastock.middlewares.SinastockSpiderMiddleware': 543,
+# }
+
+# Enable or disable downloader middlewares
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
+# DOWNLOADER_MIDDLEWARES = {
+#    'sinastock.middlewares.SinastockDownloaderMiddleware': 543,
+# }
+
+# Enable or disable extensions
+# See https://doc.scrapy.org/en/latest/topics/extensions.html
+# EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+# }
+
+# Configure item pipelines
+# See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+    'crawlab.pipelines.CrawlabMongoPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/autothrottle.html
+# AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+# AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+# AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+# AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+# HTTPCACHE_ENABLED = True
+# HTTPCACHE_EXPIRATION_SECS = 0
+# HTTPCACHE_DIR = 'httpcache'
+# HTTPCACHE_IGNORE_HTTP_CODES = []
+# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
--- a/backend/app/spiders/sinastock/sinastock/spiders/init.py
+++ b/backend/app/spiders/sinastock/sinastock/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/sinastock/sinastock/spiders/sinastock_spider.py
+++ b/backend/app/spiders/sinastock/sinastock/spiders/sinastock_spider.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+import os
+import re
+from datetime import datetime
+
+import scrapy
+from pymongo import MongoClient
+
+from sinastock.items import NewsItem
+
+class SinastockSpiderSpider(scrapy.Spider):
+    name = 'sinastock_spider'
+    allowed_domains = ['finance.sina.com.cn']
+    mongo = MongoClient(
+        host=os.environ.get('MONGO_HOST') or 'localhost',
+        port=int(os.environ.get('MONGO_PORT') or 27017)
+    )
+    db = mongo[os.environ.get('MONGO_DB') or 'crawlab_test']
+    col = db.get_collection(os.environ.get('CRAWLAB_COLLECTION') or 'stock_news')
+
+    def start_requests(self):
+        col = self.db['stocks']
+        for s in col.find({}):
+            code, ex = s['ts_code'].split('.')
+            for i in range(10):
+                url = f'http://vip.stock.finance.sina.com.cn/corp/view/vCB_AllNewsStock.php?symbol={ex.lower()}{code}&Page={i + 1}'
+                yield scrapy.Request(
+                    url=url,
+                    callback=self.parse,
+                    meta={'ts_code': s['ts_code']}
+                )
+
+    def parse(self, response):
+        for a in response.css('.datelist > ul > a'):
+            url = a.css('a::attr("href")').extract_first()
+            item = NewsItem(
+                title=a.css('a::text').extract_first(),
+                url=url,
+                source='sina',
+                stocks=[response.meta['ts_code']]
+            )
+            yield scrapy.Request(
+                url=url,
+                callback=self.parse_detail,
+                meta={'item': item}
+            )
+
+    def parse_detail(self, response):
+        item = response.meta['item']
+        text = response.css('#artibody').extract_first()
+        pre = re.compile('>(.*?)<')
+        text = ''.join(pre.findall(text))
+        item['text'] = text.replace('\u3000', '')
+        item['ts_str'] = response.css('.date::text').extract_first()
+        if item['text'] is None or item['ts_str'] is None:
+            pass
+        else:
+            item['ts'] = datetime.strptime(item['ts_str'], '%Y年%m月%d日 %H:%M')
+            yield item
--- a/backend/app/spiders/v2ex_config/Spiderfile
+++ b/backend/app/spiders/v2ex_config/Spiderfile
@@ -0,0 +1,54 @@
+name: "v2ex_config"
+display_name: "V2ex（可配置）"
+remark: "V2ex，列表+详情"
+type: "configurable"
+col: "results_v2ex_config"
+engine: scrapy
+start_url: https://v2ex.com/
+start_stage: list
+stages:
+- name: list
+  is_list: true
+  list_css: .cell.item
+  list_xpath: ""
+  page_css: ""
+  page_xpath: ""
+  page_attr: href
+  fields:
+  - name: title
+    css: a.topic-link
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+  - name: url
+    css: a.topic-link
+    xpath: ""
+    attr: href
+    next_stage: detail
+    remark: ""
+  - name: replies
+    css: .count_livid
+    xpath: ""
+    attr: ""
+    next_stage: ""
+    remark: ""
+- name: detail
+  is_list: false
+  list_css: ""
+  list_xpath: ""
+  page_css: ""
+  page_xpath: ""
+  page_attr: ""
+  fields:
+  - name: content
+    css: ""
+    xpath: .//*[@class="markdown_body"]
+    attr: ""
+    next_stage: ""
+    remark: ""
+settings:
+  AUTOTHROTTLE_ENABLED: "true"
+  ROBOTSTXT_OBEY: "false"
+  USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
+    like Gecko) Chrome/79.0.3945.117 Safari/537.36
--- a/backend/app/spiders/v2ex_config/config_spider/init.py
+++ b/backend/app/spiders/v2ex_config/config_spider/init.py
--- a/backend/app/spiders/v2ex_config/config_spider/items.py
+++ b/backend/app/spiders/v2ex_config/config_spider/items.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class Item(scrapy.Item):
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    ts = scrapy.Field()
+    title = scrapy.Field()
+    url = scrapy.Field()
+    replies = scrapy.Field()
+    content = scrapy.Field()
+
--- a/backend/app/spiders/v2ex_config/config_spider/middlewares.py
+++ b/backend/app/spiders/v2ex_config/config_spider/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class ConfigSpiderSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Request, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class ConfigSpiderDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/backend/app/spiders/v2ex_config/config_spider/pipelines.py
+++ b/backend/app/spiders/v2ex_config/config_spider/pipelines.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+
+# Define your item pipelines here
+#
+# Don't forget to add your pipeline to the ITEM_PIPELINES setting
+# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+
+import os
+from pymongo import MongoClient
+
+mongo = MongoClient(
+    host=os.environ.get('CRAWLAB_MONGO_HOST') or 'localhost',
+    port=int(os.environ.get('CRAWLAB_MONGO_PORT') or 27017),
+    username=os.environ.get('CRAWLAB_MONGO_USERNAME'),
+    password=os.environ.get('CRAWLAB_MONGO_PASSWORD'),
+    authSource=os.environ.get('CRAWLAB_MONGO_AUTHSOURCE') or 'admin'
+)
+db = mongo[os.environ.get('CRAWLAB_MONGO_DB') or 'test']
+col = db[os.environ.get('CRAWLAB_COLLECTION') or 'test']
+task_id = os.environ.get('CRAWLAB_TASK_ID')
+
+class ConfigSpiderPipeline(object):
+    def process_item(self, item, spider):
+        item['task_id'] = task_id
+        if col is not None:
+            col.save(item)
+        return item
--- a/backend/app/spiders/v2ex_config/config_spider/settings.py
+++ b/backend/app/spiders/v2ex_config/config_spider/settings.py
@@ -0,0 +1,111 @@
+# -*- coding: utf-8 -*-
+import os
+import re
+import json
+
+# Scrapy settings for config_spider project
+#
+# For simplicity, this file contains only settings considered important or
+# commonly used. You can find more settings consulting the documentation:
+#
+#     https://docs.scrapy.org/en/latest/topics/settings.html
+#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+
+BOT_NAME = 'Crawlab Configurable Spider'
+
+SPIDER_MODULES = ['config_spider.spiders']
+NEWSPIDER_MODULE = 'config_spider.spiders'
+
+
+# Crawl responsibly by identifying yourself (and your website) on the user-agent
+USER_AGENT = 'Crawlab Spider'
+
+# Obey robots.txt rules
+ROBOTSTXT_OBEY = True
+
+# Configure maximum concurrent requests performed by Scrapy (default: 16)
+#CONCURRENT_REQUESTS = 32
+
+# Configure a delay for requests for the same website (default: 0)
+# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
+# See also autothrottle settings and docs
+#DOWNLOAD_DELAY = 3
+# The download delay setting will honor only one of:
+#CONCURRENT_REQUESTS_PER_DOMAIN = 16
+#CONCURRENT_REQUESTS_PER_IP = 16
+
+# Disable cookies (enabled by default)
+#COOKIES_ENABLED = False
+
+# Disable Telnet Console (enabled by default)
+#TELNETCONSOLE_ENABLED = False
+
+# Override the default request headers:
+#DEFAULT_REQUEST_HEADERS = {
+#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+#   'Accept-Language': 'en',
+#}
+
+# Enable or disable spider middlewares
+# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
+#SPIDER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderSpiderMiddleware': 543,
+#}
+
+# Enable or disable downloader middlewares
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
+#DOWNLOADER_MIDDLEWARES = {
+#    'config_spider.middlewares.ConfigSpiderDownloaderMiddleware': 543,
+#}
+
+# Enable or disable extensions
+# See https://docs.scrapy.org/en/latest/topics/extensions.html
+#EXTENSIONS = {
+#    'scrapy.extensions.telnet.TelnetConsole': None,
+#}
+
+# Configure item pipelines
+# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
+ITEM_PIPELINES = {
+   'config_spider.pipelines.ConfigSpiderPipeline': 300,
+}
+
+# Enable and configure the AutoThrottle extension (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
+#AUTOTHROTTLE_ENABLED = True
+# The initial download delay
+#AUTOTHROTTLE_START_DELAY = 5
+# The maximum download delay to be set in case of high latencies
+#AUTOTHROTTLE_MAX_DELAY = 60
+# The average number of requests Scrapy should be sending in parallel to
+# each remote server
+#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
+# Enable showing throttling stats for every response received:
+#AUTOTHROTTLE_DEBUG = False
+
+# Enable and configure HTTP caching (disabled by default)
+# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
+#HTTPCACHE_ENABLED = True
+#HTTPCACHE_EXPIRATION_SECS = 0
+#HTTPCACHE_DIR = 'httpcache'
+#HTTPCACHE_IGNORE_HTTP_CODES = []
+#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
+
+for setting_env_name in [x for x in os.environ.keys() if x.startswith('CRAWLAB_SETTING_')]:
+    setting_name = setting_env_name.replace('CRAWLAB_SETTING_', '')
+    setting_value = os.environ.get(setting_env_name)
+    if setting_value.lower() == 'true':
+        setting_value = True
+    elif setting_value.lower() == 'false':
+        setting_value = False
+    elif re.search(r'^\d+$', setting_value) is not None:
+        setting_value = int(setting_value)
+    elif re.search(r'^\{.*\}$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    elif re.search(r'^\[.*\]$', setting_value.strip()) is not None:
+        setting_value = json.loads(setting_value)
+    else:
+        pass
+    locals()[setting_name] = setting_value
+
--- a/backend/app/spiders/v2ex_config/config_spider/spiders/init.py
+++ b/backend/app/spiders/v2ex_config/config_spider/spiders/init.py
@@ -0,0 +1,4 @@
+# This package will contain the spiders of your Scrapy project
+#
+# Please refer to the documentation for information on how to create and manage
+# your spiders.
--- a/backend/app/spiders/v2ex_config/config_spider/spiders/spider.py
+++ b/backend/app/spiders/v2ex_config/config_spider/spiders/spider.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+import scrapy
+import re
+from config_spider.items import Item
+from urllib.parse import urljoin, urlparse
+
+def get_real_url(response, url):
+    if re.search(r'^https?', url):
+        return url
+    elif re.search(r'^\/\/', url):
+        u = urlparse(response.url)
+        return u.scheme + url
+    return urljoin(response.url, url)
+
+class ConfigSpider(scrapy.Spider):
+    name = 'config_spider'
+
+    def start_requests(self):
+        yield scrapy.Request(url='https://v2ex.com/', callback=self.parse_list)
+
+    def parse_list(self, response):
+        prev_item = response.meta.get('item')
+        for elem in response.css('.cell.item'):
+            item = Item()
+            item['title'] = elem.css('a.topic-link::text').extract_first()
+            item['url'] = elem.css('a.topic-link::attr("href")').extract_first()
+            item['replies'] = elem.css('.count_livid::text').extract_first()
+            if prev_item is not None:
+                for key, value in prev_item.items():
+                    item[key] = value
+            yield scrapy.Request(url=get_real_url(response, item['url']), callback=self.parse_detail, meta={'item': item})
+
+    def parse_detail(self, response):
+        item = Item() if response.meta.get('item') is None else response.meta.get('item')
+        item['content'] = response.xpath('string(.//*[@class="markdown_body"])').extract_first()
+        yield item
+
+
--- a/backend/app/spiders/v2ex_config/md5.txt
+++ b/backend/app/spiders/v2ex_config/md5.txt
@@ -0,0 +1 @@
+402c0a07873ef74b9b574bc0f6b28423
--- a/backend/app/spiders/v2ex_config/scrapy.cfg
+++ b/backend/app/spiders/v2ex_config/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = config_spider.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = config_spider
--- a/backend/app/spiders/xueqiu/Spiderfile
+++ b/backend/app/spiders/xueqiu/Spiderfile
@@ -0,0 +1,5 @@
+name: "xueqiu"
+display_name: "雪球网 (Scrapy)"
+type: "customized"
+col: "results_xueqiu"
+cmd: "scrapy crawl xueqiu_spider"
--- a/backend/app/spiders/xueqiu/md5.txt
+++ b/backend/app/spiders/xueqiu/md5.txt
@@ -0,0 +1 @@
+df177994199caa691d87fc0c5031326d
--- a/backend/app/spiders/xueqiu/scrapy.cfg
+++ b/backend/app/spiders/xueqiu/scrapy.cfg
@@ -0,0 +1,11 @@
+# Automatically created by: scrapy startproject
+#
+# For more information about the [deploy] section see:
+# https://scrapyd.readthedocs.io/en/latest/deploy.html
+
+[settings]
+default = xueqiu.settings
+
+[deploy]
+#url = http://localhost:6800/
+project = xueqiu
--- a/backend/app/spiders/xueqiu/xueqiu/init.py
+++ b/backend/app/spiders/xueqiu/xueqiu/init.py
--- a/backend/app/spiders/xueqiu/xueqiu/items.py
+++ b/backend/app/spiders/xueqiu/xueqiu/items.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your scraped items
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/items.html
+
+import scrapy
+
+
+class XueqiuItem(scrapy.Item):
+    # define the fields for your item here like:
+    _id = scrapy.Field()
+    task_id = scrapy.Field()
+    id = scrapy.Field()
+    text = scrapy.Field()
+    url = scrapy.Field()
+    target = scrapy.Field()
+    view_count = scrapy.Field()
+    mark = scrapy.Field()
+    created_at = scrapy.Field()
+    ts = scrapy.Field()
+    source = scrapy.Field()
--- a/backend/app/spiders/xueqiu/xueqiu/middlewares.py
+++ b/backend/app/spiders/xueqiu/xueqiu/middlewares.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+
+# Define here the models for your spider middleware
+#
+# See documentation in:
+# https://doc.scrapy.org/en/latest/topics/spider-middleware.html
+
+from scrapy import signals
+
+
+class XueqiuSpiderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the spider middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_spider_input(self, response, spider):
+        # Called for each response that goes through the spider
+        # middleware and into the spider.
+
+        # Should return None or raise an exception.
+        return None
+
+    def process_spider_output(self, response, result, spider):
+        # Called with the results returned from the Spider, after
+        # it has processed the response.
+
+        # Must return an iterable of Request, dict or Item objects.
+        for i in result:
+            yield i
+
+    def process_spider_exception(self, response, exception, spider):
+        # Called when a spider or process_spider_input() method
+        # (from other spider middleware) raises an exception.
+
+        # Should return either None or an iterable of Response, dict
+        # or Item objects.
+        pass
+
+    def process_start_requests(self, start_requests, spider):
+        # Called with the start requests of the spider, and works
+        # similarly to the process_spider_output() method, except
+        # that it doesn’t have a response associated.
+
+        # Must return only requests (not items).
+        for r in start_requests:
+            yield r
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
+
+
+class XueqiuDownloaderMiddleware(object):
+    # Not all methods need to be defined. If a method is not defined,
+    # scrapy acts as if the downloader middleware does not modify the
+    # passed objects.
+
+    @classmethod
+    def from_crawler(cls, crawler):
+        # This method is used by Scrapy to create your spiders.
+        s = cls()
+        crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
+        return s
+
+    def process_request(self, request, spider):
+        # Called for each request that goes through the downloader
+        # middleware.
+
+        # Must either:
+        # - return None: continue processing this request
+        # - or return a Response object
+        # - or return a Request object
+        # - or raise IgnoreRequest: process_exception() methods of
+        #   installed downloader middleware will be called
+        return None
+
+    def process_response(self, request, response, spider):
+        # Called with the response returned from the downloader.
+
+        # Must either;
+        # - return a Response object
+        # - return a Request object
+        # - or raise IgnoreRequest
+        return response
+
+    def process_exception(self, request, exception, spider):
+        # Called when a download handler or a process_request()
+        # (from other downloader middleware) raises an exception.
+
+        # Must either:
+        # - return None: continue processing this exception
+        # - return a Response object: stops process_exception() chain
+        # - return a Request object: stops process_exception() chain
+        pass
+
+    def spider_opened(self, spider):
+        spider.logger.info('Spider opened: %s' % spider.name)
--- a/Show More
+++ b/Show More