Files
crawlab/backend/template/spiderfile/Spiderfile.toscrapy_books
2020-02-03 16:43:29 +08:00

28 lines
576 B
Plaintext

name: "toscrapy_books"
start_url: "http://books.toscrape.com"
start_stage: "list"
engine: "scrapy"
stages:
- name: list
is_list: true
list_css: "section article.product_pod"
page_css: "ul.pager li.next a"
page_attr: "href"
fields:
- name: "title"
css: "h3 > a"
- name: "url"
css: "h3 > a"
attr: "href"
next_stage: "detail"
- name: "price"
css: ".product_price > .price_color"
- name: detail
is_list: false
fields:
- name: "description"
css: "#product_description + p"
settings:
ROBOTSTXT_OBEY: true
AUTOTHROTTLE_ENABLED: true