mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
22 lines
462 B
Plaintext
22 lines
462 B
Plaintext
version: 0.4.0
|
|
start_url: "https://baidu.com/s?wd=crawlab"
|
|
start_stage: "stage_4"
|
|
engine: "scrapy"
|
|
stages:
|
|
stage_1:
|
|
is_list: true # default: false
|
|
list_css: "#content_left > .result"
|
|
page_css: "#page > a.n:last-child"
|
|
fields:
|
|
- name: "title"
|
|
css: "a"
|
|
- name: "url"
|
|
css: "a"
|
|
attr: "href"
|
|
next_stage: "stage_2"
|
|
stage_2:
|
|
is_list: false
|
|
fields:
|
|
- name: "stage_2_field_1"
|
|
css: "a"
|