优化可配置爬虫前端界面

This commit is contained in:
marvzhang
2019-11-28 21:36:17 +08:00
parent 8210fa6c20
commit d65e37517e
11 changed files with 186 additions and 93 deletions

View File

@@ -140,11 +140,12 @@ func main() {
authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型
// 可配置爬虫
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫
// 任务
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情

View File

@@ -216,7 +216,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
// 如果为CSS
if f.Attr == "" {
// 文本
return fmt.Sprintf(`css('%s::text()')`, f.Css)
return fmt.Sprintf(`css('%s::text')`, f.Css)
} else {
// 属性
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
@@ -242,9 +242,9 @@ func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
if stage.PageCss != "" {
// 如果为CSS
return fmt.Sprintf(`css(%s::attr("%s"))`, stage.PageCss, pageAttr)
return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
} else {
// 如果为XPath
return fmt.Sprintf(`xpath(%s/@%s)`, stage.PageXpath, pageAttr)
return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
}
}

View File

@@ -158,6 +158,54 @@ func UploadConfigSpider(c *gin.Context) {
})
}
func PostConfigSpiderSpiderfile(c *gin.Context) {
type Body struct {
Content string `json:"content"`
}
id := c.Param("id")
// 文件内容
var reqBody Body
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
content := reqBody.Content
// 获取爬虫
var spider model.Spider
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
return
}
// 反序列化
var configData entity.ConfigSpiderData
if err := yaml.Unmarshal([]byte(content), &configData); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// 写文件
if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func PostConfigSpiderConfig(c *gin.Context) {
id := c.Param("id")
@@ -166,20 +214,27 @@ func PostConfigSpiderConfig(c *gin.Context) {
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
return
}
// 反序列化配置数据
var configData entity.ConfigSpiderData
if err := c.ShouldBindJSON(&configData); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}
// 根据序列化后的数据处理爬虫文件
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
// TODO: 替换Spiderfile文件
// 替换Spiderfile文件
if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",

View File

@@ -13,6 +13,7 @@ import (
"github.com/globalsign/mgo/bson"
uuid "github.com/satori/go.uuid"
"github.com/spf13/viper"
"gopkg.in/yaml.v2"
"os"
"path/filepath"
"strings"
@@ -232,3 +233,28 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con
return nil
}
func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
// Spiderfile 路径
sfPath := filepath.Join(spider.Src, "Spiderfile")
// 生成Yaml内容
sfContentByte, err := yaml.Marshal(configData)
if err != nil {
return err
}
// 打开文件
f, err := os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777)
if err != nil {
return err
}
defer f.Close()
// 写入内容
if _, err := f.Write(sfContentByte); err != nil {
return err
}
return nil
}

View File

@@ -120,10 +120,10 @@
<div class="button-group-container">
<div class="button-group">
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
<el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">
{{$t('ExtractFields')}}
</el-button>
<el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>
<!-- <el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">-->
<!-- {{$t('ExtractFields')}}-->
<!-- </el-button>-->
<!-- <el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>-->
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
</div>
</div>
@@ -181,6 +181,18 @@
<div id="process-chart"></div>
</el-tab-pane>
<!--./Graph-->
<!--Spiderfile-->
<el-tab-pane name="spiderfile" label="Spiderfile">
<div class="spiderfile-actions">
<el-button type="primary" size="small" style="margin-right: 10px;" @click="onSpiderfileSave">
<font-awesome-icon :icon="['fa', 'save']"/>
{{$t('Save')}}
</el-button>
</div>
<file-detail/>
</el-tab-pane>
<!--./Spiderfile-->
</el-tabs>
<!--./tabs-->
</div>
@@ -194,16 +206,25 @@ import echarts from 'echarts'
import FieldsTableView from '../TableView/FieldsTableView'
import CrawlConfirmDialog from '../Common/CrawlConfirmDialog'
import 'codemirror/lib/codemirror.js'
import 'codemirror/mode/yaml/yaml.js'
import FileDetail from '../File/FileDetail'
export default {
name: 'ConfigList',
components: {
FileDetail,
CrawlConfirmDialog,
FieldsTableView
},
watch: {
activeTab () {
setTimeout(() => {
// 渲染流程图
this.renderProcessChart()
// 获取Spiderfile
this.getSpiderfile()
}, 0)
}
},
@@ -229,7 +250,15 @@ export default {
{ name: 'next_stage', label: 'Next Stage' }
],
activeTab: 'stages',
processChart: undefined
processChart: undefined,
fileOptions: {
mode: 'text/x-yaml',
theme: 'darcula',
styleActiveLine: true,
lineNumbers: true,
line: true,
matchBrackets: true
}
}
},
computed: {
@@ -318,34 +347,16 @@ export default {
onSelectCrawlType (value) {
this.spiderForm.crawl_type = value
},
onSave () {
async onSave () {
this.$st.sendEv('爬虫详情-配置', '保存')
return new Promise((resolve, reject) => {
this.saveLoading = true
this.$store.dispatch('spider/updateSpiderFields')
.then(() => {
this.$store.dispatch('spider/editSpider')
.then(() => {
this.$message.success(this.$t('Spider info has been saved successfully'))
resolve()
})
.catch(() => {
this.$message.error(this.$t('Something wrong happened'))
reject(new Error())
})
.finally(() => {
this.saveLoading = false
})
})
.then(() => {
this.$store.dispatch('spider/updateSpiderDetailFields')
})
.catch(() => {
this.$message.error(this.$t('Something wrong happened'))
this.saveLoading = false
reject(new Error())
})
})
this.saveLoading = true
try {
await this.$store.dispatch('spider/postConfigSpiderConfig')
this.$message.success(this.$t('Spider info has been saved successfully'))
} catch (e) {
this.$message.error(this.$t('Something wrong happened'))
}
this.saveLoading = false
},
onDialogClose () {
this.dialogVisible = false
@@ -378,17 +389,8 @@ export default {
})
},
onCrawl () {
this.$confirm(this.$t('Are you sure to run this spider?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel')
})
.then(() => {
this.$store.dispatch('spider/crawlSpider', this.spiderForm._id)
.then(() => {
this.$message.success(this.$t(`Spider task has been scheduled`))
})
this.$st.sendEv('爬虫详情-配置', '运行')
})
this.crawlConfirmDialogVisible = true
this.$st.sendEv('爬虫详情-配置', '点击运行')
},
onExtractFields () {
this.$refs['form'].validate(res => {
@@ -571,39 +573,20 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
const totalWidth = Number(getComputedStyle(elStages).width.replace('px', ''))
const paddingRight = Number(getComputedStyle(elStages).paddingRight.replace('px', ''))
elBar.setAttribute('style', 'width:' + (totalWidth - paddingRight) + 'px')
}
},
created () {
// fields for list page
if (!this.spiderForm.fields) {
this.spiderForm.fields = []
for (let i = 0; i < 3; i++) {
this.spiderForm.fields.push({
name: 'field_' + (i + 1),
type: 'css',
extract_type: 'text'
})
},
getSpiderfile () {
this.$store.commit('file/SET_FILE_CONTENT', '')
this.$store.commit('file/SET_CURRENT_PATH', 'Spiderfile')
this.$store.dispatch('file/getFileContent', { path: 'Spiderfile' })
},
async onSpiderfileSave () {
try {
await this.$store.dispatch('spider/saveConfigSpiderSpiderfile')
this.$message.success(this.$t('Spiderfile saved successfully'))
} catch (e) {
this.$message.error('Something wrong happened')
}
}
// fields for detail page
if (!this.spiderForm.detail_fields) {
this.spiderForm.detail_fields = []
for (let i = 0; i < 3; i++) {
this.spiderForm.detail_fields.push({
name: 'field_' + (i + 1),
type: 'css',
extract_type: 'text'
})
}
}
if (!this.spiderForm.crawl_type) this.$set(this.spiderForm, 'crawl_type', 'list')
// if (!this.spiderForm.start_url) this.$set(this.spiderForm, 'start_url', 'http://example.com')
if (!this.spiderForm.item_selector_type) this.$set(this.spiderForm, 'item_selector_type', 'css')
if (!this.spiderForm.pagination_selector_type) this.$set(this.spiderForm, 'pagination_selector_type', 'css')
if (this.spiderForm.obey_robots_txt == null) this.$set(this.spiderForm, 'obey_robots_txt', true)
if (this.spiderForm.item_threshold == null) this.$set(this.spiderForm, 'item_threshold', 10)
},
mounted () {
this.activeNames = Object.keys(this.spiderForm.config.stages)
@@ -770,4 +753,13 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
width: 100%;
height: 480px;
}
.config-list >>> .file-content {
height: calc(100vh - 280px);
}
.spiderfile-actions {
margin-bottom: 5px;
text-align: right;
}
</style>

View File

@@ -18,6 +18,7 @@ import 'codemirror/mode/go/go.js'
import 'codemirror/mode/shell/shell.js'
import 'codemirror/mode/markdown/markdown.js'
import 'codemirror/mode/php/php.js'
import 'codemirror/mode/yaml/yaml.js'
export default {
name: 'FileDetail',
@@ -38,7 +39,7 @@ export default {
},
options () {
return {
mode: this.lanaguage,
mode: this.language,
theme: 'darcula',
styleActiveLine: true,
lineNumbers: true,
@@ -46,8 +47,9 @@ export default {
matchBrackets: true
}
},
lanaguage () {
language () {
const fileName = this.$store.state.file.currentPath
if (!fileName) return ''
if (fileName.match(/\.js$/)) {
return 'text/javascript'
} else if (fileName.match(/\.py$/)) {
@@ -60,6 +62,8 @@ export default {
return 'text/x-php'
} else if (fileName.match(/\.md$/)) {
return 'text/x-markdown'
} else if (fileName === 'Spiderfile') {
return 'text/x-yaml'
} else {
return 'text'
}
@@ -74,7 +78,7 @@ export default {
<style scoped>
.file-content {
border: 1px solid #eaecef;
height: 480px;
height: calc(100vh - 256px);
}
.file-content >>> .CodeMirror {

View File

@@ -120,6 +120,8 @@ export default {
this.showFile = false
this.onBack()
}
},
created () {
}
}
</script>

View File

@@ -102,7 +102,11 @@ export default {
'spiderForm'
]),
isShowRun () {
return !!this.spiderForm.cmd
if (this.spiderForm.type === 'customized') {
return !!this.spiderForm.cmd
} else {
return true
}
}
},
methods: {

View File

@@ -43,12 +43,7 @@ const actions = {
commit('SET_FILE_CONTENT', response.data.data)
})
},
saveFileContent ({ state, rootState }, payload) {
const { path } = payload
const spiderId = rootState.spider.spiderForm._id
const content = state.fileContent
return request.post(`/spiders/${spiderId}/file`, { content, path })
}
}
export default {

View File

@@ -148,6 +148,13 @@ const actions = {
},
extractFields ({ state, commit }) {
return request.post(`/spiders/${state.spiderForm._id}/extract_fields`)
},
postConfigSpiderConfig ({ state }) {
return request.post(`/config_spiders/${state.spiderForm._id}/config`, state.spiderForm.config)
},
saveConfigSpiderSpiderfile ({ state, rootState }) {
const content = rootState.file.fileContent
return request.post(`/config_spiders/${state.spiderForm._id}/spiderfile`, { content })
}
}

View File

@@ -48,6 +48,13 @@ export default {
FileList,
SpiderOverview
},
watch: {
activeTabName () {
// 初始化文件
this.$store.commit('file/SET_FILE_CONTENT', '')
this.$store.commit('file/SET_CURRENT_PATH', '')
}
},
data () {
return {
activeTabName: 'overview'