Files
crawlab/backend/template/spiders/v2ex_config/Spiderfile
2020-02-03 11:58:05 +08:00

55 lines
1.1 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
name: "v2ex_config"
display_name: "V2ex可配置"
remark: "V2ex列表+详情"
type: "configurable"
col: "results_v2ex_config"
engine: scrapy
start_url: https://v2ex.com/
start_stage: list
stages:
- name: list
is_list: true
list_css: .cell.item
list_xpath: ""
page_css: ""
page_xpath: ""
page_attr: href
fields:
- name: title
css: a.topic-link
xpath: ""
attr: ""
next_stage: ""
remark: ""
- name: url
css: a.topic-link
xpath: ""
attr: href
next_stage: detail
remark: ""
- name: replies
css: .count_livid
xpath: ""
attr: ""
next_stage: ""
remark: ""
- name: detail
is_list: false
list_css: ""
list_xpath: ""
page_css: ""
page_xpath: ""
page_attr: ""
fields:
- name: content
css: ""
xpath: .//*[@class="markdown_body"]
attr: ""
next_stage: ""
remark: ""
settings:
AUTOTHROTTLE_ENABLED: "true"
ROBOTSTXT_OBEY: "false"
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/79.0.3945.117 Safari/537.36