Files
crawlab/backend/template/Spiderfile
2019-11-24 19:45:21 +08:00

26 lines
608 B
Plaintext

version: "0.4.0"
name: "toscrapy_books"
start_url: "http://books.toscrape.com"
start_stage: "list"
engine: "scrapy"
stages:
list:
is_list: true # default: false
list_css: "section article.product_pod"
page_css: "ul.pager li.next a"
page_attr: "href" # default: href
fields:
- name: "title"
css: "h3 > a"
- name: "url"
css: "h3 > a"
attr: "href"
next_stage: "detail"
- name: "price"
css: ".product_price > .price_color"
detail:
is_list: false
fields:
- name: "description"
css: "#product_description + p"