Files
crawlab/backend/template/Spiderfile
2019-11-24 18:51:32 +08:00

22 lines
462 B
Plaintext

version: 0.4.0
start_url: "https://baidu.com/s?wd=crawlab"
start_stage: "stage_4"
engine: "scrapy"
stages:
stage_1:
is_list: true # default: false
list_css: "#content_left > .result"
page_css: "#page > a.n:last-child"
fields:
- name: "title"
css: "a"
- name: "url"
css: "a"
attr: "href"
next_stage: "stage_2"
stage_2:
is_list: false
fields:
- name: "stage_2_field_1"
css: "a"