mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-23 17:31:11 +01:00
26 lines
608 B
Plaintext
26 lines
608 B
Plaintext
version: "0.4.0"
|
|
name: "toscrapy_books"
|
|
start_url: "http://books.toscrape.com"
|
|
start_stage: "list"
|
|
engine: "scrapy"
|
|
stages:
|
|
list:
|
|
is_list: true # default: false
|
|
list_css: "section article.product_pod"
|
|
page_css: "ul.pager li.next a"
|
|
page_attr: "href" # default: href
|
|
fields:
|
|
- name: "title"
|
|
css: "h3 > a"
|
|
- name: "url"
|
|
css: "h3 > a"
|
|
attr: "href"
|
|
next_stage: "detail"
|
|
- name: "price"
|
|
css: ".product_price > .price_color"
|
|
detail:
|
|
is_list: false
|
|
fields:
|
|
- name: "description"
|
|
css: "#product_description + p"
|