mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-30 18:00:56 +01:00
28 lines
576 B
Plaintext
28 lines
576 B
Plaintext
name: "toscrapy_books"
|
|
start_url: "http://books.toscrape.com"
|
|
start_stage: "list"
|
|
engine: "scrapy"
|
|
stages:
|
|
- name: list
|
|
is_list: true
|
|
list_css: "section article.product_pod"
|
|
page_css: "ul.pager li.next a"
|
|
page_attr: "href"
|
|
fields:
|
|
- name: "title"
|
|
css: "h3 > a"
|
|
- name: "url"
|
|
css: "h3 > a"
|
|
attr: "href"
|
|
next_stage: "detail"
|
|
- name: "price"
|
|
css: ".product_price > .price_color"
|
|
- name: detail
|
|
is_list: false
|
|
fields:
|
|
- name: "description"
|
|
css: "#product_description + p"
|
|
settings:
|
|
ROBOTSTXT_OBEY: true
|
|
AUTOTHROTTLE_ENABLED: true
|