mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-24 17:41:03 +01:00
55 lines
1.1 KiB
Plaintext
55 lines
1.1 KiB
Plaintext
name: "v2ex_config"
|
||
display_name: "V2ex(可配置)"
|
||
remark: "V2ex,列表+详情"
|
||
type: "configurable"
|
||
col: "results_v2ex_config"
|
||
engine: scrapy
|
||
start_url: https://v2ex.com/
|
||
start_stage: list
|
||
stages:
|
||
- name: list
|
||
is_list: true
|
||
list_css: .cell.item
|
||
list_xpath: ""
|
||
page_css: ""
|
||
page_xpath: ""
|
||
page_attr: href
|
||
fields:
|
||
- name: title
|
||
css: a.topic-link
|
||
xpath: ""
|
||
attr: ""
|
||
next_stage: ""
|
||
remark: ""
|
||
- name: url
|
||
css: a.topic-link
|
||
xpath: ""
|
||
attr: href
|
||
next_stage: detail
|
||
remark: ""
|
||
- name: replies
|
||
css: .count_livid
|
||
xpath: ""
|
||
attr: ""
|
||
next_stage: ""
|
||
remark: ""
|
||
- name: detail
|
||
is_list: false
|
||
list_css: ""
|
||
list_xpath: ""
|
||
page_css: ""
|
||
page_xpath: ""
|
||
page_attr: ""
|
||
fields:
|
||
- name: content
|
||
css: ""
|
||
xpath: .//*[@class="markdown_body"]
|
||
attr: ""
|
||
next_stage: ""
|
||
remark: ""
|
||
settings:
|
||
AUTOTHROTTLE_ENABLED: "true"
|
||
ROBOTSTXT_OBEY: "false"
|
||
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
|
||
like Gecko) Chrome/79.0.3945.117 Safari/537.36
|