Files
crawlab/backend/template/spiders/baidu_config/Spiderfile
2020-02-03 11:58:05 +08:00

40 lines
868 B
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
name: "baidu_config"
display_name: "百度搜索(可配置)"
remark: "百度搜索Crawlab列表+分页"
type: "configurable"
col: "results_baidu_config"
engine: scrapy
start_url: http://www.baidu.com/s?wd=crawlab
start_stage: list
stages:
- name: list
is_list: true
list_css: ".result.c-container"
list_xpath: ""
page_css: "a.n"
page_xpath: ""
page_attr: href
fields:
- name: title
css: ""
xpath: .//h3/a
attr: ""
next_stage: ""
remark: ""
- name: url
css: ""
xpath: .//h3/a
attr: href
next_stage: ""
remark: ""
- name: abstract
css: ""
xpath: .//*[@class="c-abstract"]
attr: ""
next_stage: ""
remark: ""
settings:
ROBOTSTXT_OBEY: "false"
USER_AGENT: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML,
like Gecko) Chrome/78.0.3904.108 Safari/537.36