mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-31 18:10:50 +01:00
加入可配置爬虫
This commit is contained in:
@@ -1,22 +1,23 @@
|
||||
package entity
|
||||
|
||||
type Field struct {
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Css string `yaml:"css" json:"css"`
|
||||
Xpath string `yaml:"xpath" json:"xpath"`
|
||||
Attr string `yaml:"attr" json:"attr"`
|
||||
Stage string `yaml:"stage" json:"stage"`
|
||||
Name string `yaml:"name" json:"name"`
|
||||
Css string `yaml:"css" json:"css"`
|
||||
Xpath string `yaml:"xpath" json:"xpath"`
|
||||
Attr string `yaml:"attr" json:"attr"`
|
||||
NextStage string `yaml:"next_stage" json:"next_stage"`
|
||||
}
|
||||
|
||||
type Stage struct {
|
||||
List bool `yaml:"list" json:"list"`
|
||||
Css string `yaml:"css" json:"css"`
|
||||
Xpath string `yaml:"xpath" json:"xpath"`
|
||||
Fields []Field `yaml:"fields" json:"fields"`
|
||||
IsList bool `yaml:"is_list" json:"is_list"`
|
||||
ListCss string `yaml:"list_css" json:"list_css"`
|
||||
PageCss string `yaml:"page_css" json:"page_css"`
|
||||
Fields []Field `yaml:"fields" json:"fields"`
|
||||
}
|
||||
|
||||
type ConfigSpiderData struct {
|
||||
Version string `yaml:"version" json:"version"`
|
||||
StartUrl string `yaml:"startUrl" json:"start_url"`
|
||||
Engine string `yaml:"engine" json:"engine"`
|
||||
StartUrl string `yaml:"start_url" json:"start_url"`
|
||||
Stages map[string]Stage `yaml:"stages" json:"stages"`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user