mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-23 17:31:11 +01:00
20 lines
404 B
Plaintext
20 lines
404 B
Plaintext
version: 0.4.0
|
|
start_url: "https://baidu.com/s?wd=crawlab"
|
|
engine: "scrapy"
|
|
stages:
|
|
stage_1:
|
|
is_list: true # default: false
|
|
list_css: "#content_left > .result"
|
|
page_css: "#page > a.n:last-child"
|
|
fields:
|
|
- name: "title"
|
|
css: "a"
|
|
- name: "url"
|
|
css: "a"
|
|
attr: "href"
|
|
next_stage: "stage_2"
|
|
stage_2:
|
|
list: false
|
|
fields:
|
|
- name: ""
|