mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-22 17:31:03 +01:00
优化可配置爬虫前端界面
This commit is contained in:
@@ -140,11 +140,12 @@ func main() {
|
||||
authGroup.GET("/spiders/:id/stats", routes.GetSpiderStats) // 爬虫统计数据
|
||||
authGroup.GET("/spider/types", routes.GetSpiderTypes) // 爬虫类型
|
||||
// 可配置爬虫
|
||||
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
|
||||
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
|
||||
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
|
||||
authGroup.GET("/config_spiders/:id/config", routes.GetConfigSpiderConfig) // 获取可配置爬虫配置
|
||||
authGroup.POST("/config_spiders/:id/config", routes.PostConfigSpiderConfig) // 更改可配置爬虫配置
|
||||
authGroup.PUT("/config_spiders", routes.PutConfigSpider) // 添加可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id", routes.PostConfigSpider) // 修改可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/upload", routes.UploadConfigSpider) // 上传可配置爬虫
|
||||
authGroup.POST("/config_spiders/:id/spiderfile", routes.PostConfigSpiderSpiderfile) // 上传可配置爬虫
|
||||
// 任务
|
||||
authGroup.GET("/tasks", routes.GetTaskList) // 任务列表
|
||||
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
|
||||
|
||||
@@ -216,7 +216,7 @@ func (g ScrapyGenerator) GetExtractStringFromField(f entity.Field) string {
|
||||
// 如果为CSS
|
||||
if f.Attr == "" {
|
||||
// 文本
|
||||
return fmt.Sprintf(`css('%s::text()')`, f.Css)
|
||||
return fmt.Sprintf(`css('%s::text')`, f.Css)
|
||||
} else {
|
||||
// 属性
|
||||
return fmt.Sprintf(`css('%s::attr("%s")')`, f.Css, f.Attr)
|
||||
@@ -242,9 +242,9 @@ func (g ScrapyGenerator) GetExtractStringFromStage(stage entity.Stage) string {
|
||||
|
||||
if stage.PageCss != "" {
|
||||
// 如果为CSS
|
||||
return fmt.Sprintf(`css(%s::attr("%s"))`, stage.PageCss, pageAttr)
|
||||
return fmt.Sprintf(`css('%s::attr("%s")')`, stage.PageCss, pageAttr)
|
||||
} else {
|
||||
// 如果为XPath
|
||||
return fmt.Sprintf(`xpath(%s/@%s)`, stage.PageXpath, pageAttr)
|
||||
return fmt.Sprintf(`xpath('%s/@%s')`, stage.PageXpath, pageAttr)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,6 +158,54 @@ func UploadConfigSpider(c *gin.Context) {
|
||||
})
|
||||
}
|
||||
|
||||
func PostConfigSpiderSpiderfile(c *gin.Context) {
|
||||
type Body struct {
|
||||
Content string `json:"content"`
|
||||
}
|
||||
|
||||
id := c.Param("id")
|
||||
|
||||
// 文件内容
|
||||
var reqBody Body
|
||||
if err := c.ShouldBindJSON(&reqBody); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
content := reqBody.Content
|
||||
|
||||
// 获取爬虫
|
||||
var spider model.Spider
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
|
||||
return
|
||||
}
|
||||
|
||||
// 反序列化
|
||||
var configData entity.ConfigSpiderData
|
||||
if err := yaml.Unmarshal([]byte(content), &configData); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 写文件
|
||||
if err := ioutil.WriteFile(filepath.Join(spider.Src, "Spiderfile"), []byte(content), os.ModePerm); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
func PostConfigSpiderConfig(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
@@ -166,20 +214,27 @@ func PostConfigSpiderConfig(c *gin.Context) {
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleErrorF(http.StatusBadRequest, c, fmt.Sprintf("cannot find spider (id: %s)", id))
|
||||
return
|
||||
}
|
||||
|
||||
// 反序列化配置数据
|
||||
var configData entity.ConfigSpiderData
|
||||
if err := c.ShouldBindJSON(&configData); err != nil {
|
||||
HandleError(http.StatusBadRequest, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// 根据序列化后的数据处理爬虫文件
|
||||
if err := services.ProcessSpiderFilesFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: 替换Spiderfile文件
|
||||
// 替换Spiderfile文件
|
||||
if err := services.GenerateSpiderfileFromConfigData(spider, configData); err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/globalsign/mgo/bson"
|
||||
uuid "github.com/satori/go.uuid"
|
||||
"github.com/spf13/viper"
|
||||
"gopkg.in/yaml.v2"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -232,3 +233,28 @@ func ProcessSpiderFilesFromConfigData(spider model.Spider, configData entity.Con
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GenerateSpiderfileFromConfigData(spider model.Spider, configData entity.ConfigSpiderData) error {
|
||||
// Spiderfile 路径
|
||||
sfPath := filepath.Join(spider.Src, "Spiderfile")
|
||||
|
||||
// 生成Yaml内容
|
||||
sfContentByte, err := yaml.Marshal(configData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 打开文件
|
||||
f, err := os.OpenFile(sfPath, os.O_WRONLY|os.O_TRUNC, 0777)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// 写入内容
|
||||
if _, err := f.Write(sfContentByte); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -120,10 +120,10 @@
|
||||
<div class="button-group-container">
|
||||
<div class="button-group">
|
||||
<el-button type="danger" @click="onCrawl">{{$t('Run')}}</el-button>
|
||||
<el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">
|
||||
{{$t('ExtractFields')}}
|
||||
</el-button>
|
||||
<el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>
|
||||
<!-- <el-button type="primary" @click="onExtractFields" v-loading="extractFieldsLoading">-->
|
||||
<!-- {{$t('ExtractFields')}}-->
|
||||
<!-- </el-button>-->
|
||||
<!-- <el-button type="warning" @click="onPreview" v-loading="previewLoading">{{$t('Preview')}}</el-button>-->
|
||||
<el-button type="success" @click="onSave" v-loading="saveLoading">{{$t('Save')}}</el-button>
|
||||
</div>
|
||||
</div>
|
||||
@@ -181,6 +181,18 @@
|
||||
<div id="process-chart"></div>
|
||||
</el-tab-pane>
|
||||
<!--./Graph-->
|
||||
|
||||
<!--Spiderfile-->
|
||||
<el-tab-pane name="spiderfile" label="Spiderfile">
|
||||
<div class="spiderfile-actions">
|
||||
<el-button type="primary" size="small" style="margin-right: 10px;" @click="onSpiderfileSave">
|
||||
<font-awesome-icon :icon="['fa', 'save']"/>
|
||||
{{$t('Save')}}
|
||||
</el-button>
|
||||
</div>
|
||||
<file-detail/>
|
||||
</el-tab-pane>
|
||||
<!--./Spiderfile-->
|
||||
</el-tabs>
|
||||
<!--./tabs-->
|
||||
</div>
|
||||
@@ -194,16 +206,25 @@ import echarts from 'echarts'
|
||||
import FieldsTableView from '../TableView/FieldsTableView'
|
||||
import CrawlConfirmDialog from '../Common/CrawlConfirmDialog'
|
||||
|
||||
import 'codemirror/lib/codemirror.js'
|
||||
import 'codemirror/mode/yaml/yaml.js'
|
||||
import FileDetail from '../File/FileDetail'
|
||||
|
||||
export default {
|
||||
name: 'ConfigList',
|
||||
components: {
|
||||
FileDetail,
|
||||
CrawlConfirmDialog,
|
||||
FieldsTableView
|
||||
},
|
||||
watch: {
|
||||
activeTab () {
|
||||
setTimeout(() => {
|
||||
// 渲染流程图
|
||||
this.renderProcessChart()
|
||||
|
||||
// 获取Spiderfile
|
||||
this.getSpiderfile()
|
||||
}, 0)
|
||||
}
|
||||
},
|
||||
@@ -229,7 +250,15 @@ export default {
|
||||
{ name: 'next_stage', label: 'Next Stage' }
|
||||
],
|
||||
activeTab: 'stages',
|
||||
processChart: undefined
|
||||
processChart: undefined,
|
||||
fileOptions: {
|
||||
mode: 'text/x-yaml',
|
||||
theme: 'darcula',
|
||||
styleActiveLine: true,
|
||||
lineNumbers: true,
|
||||
line: true,
|
||||
matchBrackets: true
|
||||
}
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
@@ -318,34 +347,16 @@ export default {
|
||||
onSelectCrawlType (value) {
|
||||
this.spiderForm.crawl_type = value
|
||||
},
|
||||
onSave () {
|
||||
async onSave () {
|
||||
this.$st.sendEv('爬虫详情-配置', '保存')
|
||||
return new Promise((resolve, reject) => {
|
||||
this.saveLoading = true
|
||||
this.$store.dispatch('spider/updateSpiderFields')
|
||||
.then(() => {
|
||||
this.$store.dispatch('spider/editSpider')
|
||||
.then(() => {
|
||||
this.$message.success(this.$t('Spider info has been saved successfully'))
|
||||
resolve()
|
||||
})
|
||||
.catch(() => {
|
||||
this.$message.error(this.$t('Something wrong happened'))
|
||||
reject(new Error())
|
||||
})
|
||||
.finally(() => {
|
||||
this.saveLoading = false
|
||||
})
|
||||
})
|
||||
.then(() => {
|
||||
this.$store.dispatch('spider/updateSpiderDetailFields')
|
||||
})
|
||||
.catch(() => {
|
||||
this.$message.error(this.$t('Something wrong happened'))
|
||||
this.saveLoading = false
|
||||
reject(new Error())
|
||||
})
|
||||
})
|
||||
this.saveLoading = true
|
||||
try {
|
||||
await this.$store.dispatch('spider/postConfigSpiderConfig')
|
||||
this.$message.success(this.$t('Spider info has been saved successfully'))
|
||||
} catch (e) {
|
||||
this.$message.error(this.$t('Something wrong happened'))
|
||||
}
|
||||
this.saveLoading = false
|
||||
},
|
||||
onDialogClose () {
|
||||
this.dialogVisible = false
|
||||
@@ -378,17 +389,8 @@ export default {
|
||||
})
|
||||
},
|
||||
onCrawl () {
|
||||
this.$confirm(this.$t('Are you sure to run this spider?'), this.$t('Notification'), {
|
||||
confirmButtonText: this.$t('Confirm'),
|
||||
cancelButtonText: this.$t('Cancel')
|
||||
})
|
||||
.then(() => {
|
||||
this.$store.dispatch('spider/crawlSpider', this.spiderForm._id)
|
||||
.then(() => {
|
||||
this.$message.success(this.$t(`Spider task has been scheduled`))
|
||||
})
|
||||
this.$st.sendEv('爬虫详情-配置', '运行')
|
||||
})
|
||||
this.crawlConfirmDialogVisible = true
|
||||
this.$st.sendEv('爬虫详情-配置', '点击运行')
|
||||
},
|
||||
onExtractFields () {
|
||||
this.$refs['form'].validate(res => {
|
||||
@@ -571,39 +573,20 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
|
||||
const totalWidth = Number(getComputedStyle(elStages).width.replace('px', ''))
|
||||
const paddingRight = Number(getComputedStyle(elStages).paddingRight.replace('px', ''))
|
||||
elBar.setAttribute('style', 'width:' + (totalWidth - paddingRight) + 'px')
|
||||
}
|
||||
},
|
||||
created () {
|
||||
// fields for list page
|
||||
if (!this.spiderForm.fields) {
|
||||
this.spiderForm.fields = []
|
||||
for (let i = 0; i < 3; i++) {
|
||||
this.spiderForm.fields.push({
|
||||
name: 'field_' + (i + 1),
|
||||
type: 'css',
|
||||
extract_type: 'text'
|
||||
})
|
||||
},
|
||||
getSpiderfile () {
|
||||
this.$store.commit('file/SET_FILE_CONTENT', '')
|
||||
this.$store.commit('file/SET_CURRENT_PATH', 'Spiderfile')
|
||||
this.$store.dispatch('file/getFileContent', { path: 'Spiderfile' })
|
||||
},
|
||||
async onSpiderfileSave () {
|
||||
try {
|
||||
await this.$store.dispatch('spider/saveConfigSpiderSpiderfile')
|
||||
this.$message.success(this.$t('Spiderfile saved successfully'))
|
||||
} catch (e) {
|
||||
this.$message.error('Something wrong happened')
|
||||
}
|
||||
}
|
||||
|
||||
// fields for detail page
|
||||
if (!this.spiderForm.detail_fields) {
|
||||
this.spiderForm.detail_fields = []
|
||||
for (let i = 0; i < 3; i++) {
|
||||
this.spiderForm.detail_fields.push({
|
||||
name: 'field_' + (i + 1),
|
||||
type: 'css',
|
||||
extract_type: 'text'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (!this.spiderForm.crawl_type) this.$set(this.spiderForm, 'crawl_type', 'list')
|
||||
// if (!this.spiderForm.start_url) this.$set(this.spiderForm, 'start_url', 'http://example.com')
|
||||
if (!this.spiderForm.item_selector_type) this.$set(this.spiderForm, 'item_selector_type', 'css')
|
||||
if (!this.spiderForm.pagination_selector_type) this.$set(this.spiderForm, 'pagination_selector_type', 'css')
|
||||
if (this.spiderForm.obey_robots_txt == null) this.$set(this.spiderForm, 'obey_robots_txt', true)
|
||||
if (this.spiderForm.item_threshold == null) this.$set(this.spiderForm, 'item_threshold', 10)
|
||||
},
|
||||
mounted () {
|
||||
this.activeNames = Object.keys(this.spiderForm.config.stages)
|
||||
@@ -770,4 +753,13 @@ ${f.css || f.xpath} ${f.attr ? ('(' + f.attr + ')') : ''} ${f.next_stage ? (' --
|
||||
width: 100%;
|
||||
height: 480px;
|
||||
}
|
||||
|
||||
.config-list >>> .file-content {
|
||||
height: calc(100vh - 280px);
|
||||
}
|
||||
|
||||
.spiderfile-actions {
|
||||
margin-bottom: 5px;
|
||||
text-align: right;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -18,6 +18,7 @@ import 'codemirror/mode/go/go.js'
|
||||
import 'codemirror/mode/shell/shell.js'
|
||||
import 'codemirror/mode/markdown/markdown.js'
|
||||
import 'codemirror/mode/php/php.js'
|
||||
import 'codemirror/mode/yaml/yaml.js'
|
||||
|
||||
export default {
|
||||
name: 'FileDetail',
|
||||
@@ -38,7 +39,7 @@ export default {
|
||||
},
|
||||
options () {
|
||||
return {
|
||||
mode: this.lanaguage,
|
||||
mode: this.language,
|
||||
theme: 'darcula',
|
||||
styleActiveLine: true,
|
||||
lineNumbers: true,
|
||||
@@ -46,8 +47,9 @@ export default {
|
||||
matchBrackets: true
|
||||
}
|
||||
},
|
||||
lanaguage () {
|
||||
language () {
|
||||
const fileName = this.$store.state.file.currentPath
|
||||
if (!fileName) return ''
|
||||
if (fileName.match(/\.js$/)) {
|
||||
return 'text/javascript'
|
||||
} else if (fileName.match(/\.py$/)) {
|
||||
@@ -60,6 +62,8 @@ export default {
|
||||
return 'text/x-php'
|
||||
} else if (fileName.match(/\.md$/)) {
|
||||
return 'text/x-markdown'
|
||||
} else if (fileName === 'Spiderfile') {
|
||||
return 'text/x-yaml'
|
||||
} else {
|
||||
return 'text'
|
||||
}
|
||||
@@ -74,7 +78,7 @@ export default {
|
||||
<style scoped>
|
||||
.file-content {
|
||||
border: 1px solid #eaecef;
|
||||
height: 480px;
|
||||
height: calc(100vh - 256px);
|
||||
}
|
||||
|
||||
.file-content >>> .CodeMirror {
|
||||
|
||||
@@ -120,6 +120,8 @@ export default {
|
||||
this.showFile = false
|
||||
this.onBack()
|
||||
}
|
||||
},
|
||||
created () {
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -102,7 +102,11 @@ export default {
|
||||
'spiderForm'
|
||||
]),
|
||||
isShowRun () {
|
||||
return !!this.spiderForm.cmd
|
||||
if (this.spiderForm.type === 'customized') {
|
||||
return !!this.spiderForm.cmd
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
|
||||
@@ -43,12 +43,7 @@ const actions = {
|
||||
commit('SET_FILE_CONTENT', response.data.data)
|
||||
})
|
||||
},
|
||||
saveFileContent ({ state, rootState }, payload) {
|
||||
const { path } = payload
|
||||
const spiderId = rootState.spider.spiderForm._id
|
||||
const content = state.fileContent
|
||||
return request.post(`/spiders/${spiderId}/file`, { content, path })
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default {
|
||||
|
||||
@@ -148,6 +148,13 @@ const actions = {
|
||||
},
|
||||
extractFields ({ state, commit }) {
|
||||
return request.post(`/spiders/${state.spiderForm._id}/extract_fields`)
|
||||
},
|
||||
postConfigSpiderConfig ({ state }) {
|
||||
return request.post(`/config_spiders/${state.spiderForm._id}/config`, state.spiderForm.config)
|
||||
},
|
||||
saveConfigSpiderSpiderfile ({ state, rootState }) {
|
||||
const content = rootState.file.fileContent
|
||||
return request.post(`/config_spiders/${state.spiderForm._id}/spiderfile`, { content })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -48,6 +48,13 @@ export default {
|
||||
FileList,
|
||||
SpiderOverview
|
||||
},
|
||||
watch: {
|
||||
activeTabName () {
|
||||
// 初始化文件
|
||||
this.$store.commit('file/SET_FILE_CONTENT', '')
|
||||
this.$store.commit('file/SET_CURRENT_PATH', '')
|
||||
}
|
||||
},
|
||||
data () {
|
||||
return {
|
||||
activeTabName: 'overview'
|
||||
|
||||
Reference in New Issue
Block a user