Files
crawlab/backend/template/spiderfile/Spiderfile.toscrapy_books
2019-12-13 12:55:53 +08:00

29 lines
593 B
Plaintext

version: "0.4.0"
name: "toscrapy_books"
start_url: "http://books.toscrape.com"
start_stage: "list"
engine: "scrapy"
stages:
- name: list
is_list: true
list_css: "section article.product_pod"
page_css: "ul.pager li.next a"
page_attr: "href"
fields:
- name: "title"
css: "h3 > a"
- name: "url"
css: "h3 > a"
attr: "href"
next_stage: "detail"
- name: "price"
css: ".product_price > .price_color"
- name: detail
is_list: false
fields:
- name: "description"
css: "#product_description + p"
settings:
ROBOTSTXT_OBEY: true
AUTOTHROTTLE_ENABLED: true