加入scrapy爬虫支持

This commit is contained in:
marvzhang
2020-02-15 18:28:37 +08:00
parent e2b9d285da
commit acdf4d5951
12 changed files with 222 additions and 49 deletions

View File

@@ -37,6 +37,10 @@ type Spider struct {
// 自定义爬虫
Cmd string `json:"cmd" bson:"cmd"` // 执行命令
// Scrapy 爬虫(属于自定义爬虫)
IsScrapy bool `json:"is_scrapy" bson:"is_scrapy"` // 是否为 Scrapy 爬虫
SpiderNames []string `json:"spider_names" bson:"spider_names"` // 爬虫名称列表
// 可配置爬虫
Template string `json:"template" bson:"template"` // Spiderfile模版

View File

@@ -88,15 +88,16 @@ func GetSpider(c *gin.Context) {
HandleErrorF(http.StatusBadRequest, c, "invalid id")
}
result, err := model.GetSpider(bson.ObjectIdHex(id))
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: result,
Data: spider,
})
}
@@ -901,3 +902,30 @@ func GetSpiderSchedules(c *gin.Context) {
Data: list,
})
}
func GetSpiderScrapySpiders(c *gin.Context) {
id := c.Param("id")
if !bson.IsObjectIdHex(id) {
HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid")
return
}
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
spiderNames, err := services.GetScrapySpiderNames(spider)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: spiderNames,
})
}

View File

@@ -0,0 +1,32 @@
package services
import (
"bytes"
"crawlab/model"
"os/exec"
"strings"
)
func GetScrapySpiderNames(s model.Spider) ([]string, error) {
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd := exec.Command("scrapy", "list")
cmd.Dir = s.Src
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return []string{}, err
}
spiderNames := strings.Split(stdout.String(), "\n")
var res []string
for _, sn := range spiderNames {
if sn != "" {
res = append(res, sn)
}
}
return res, nil
}

View File

@@ -243,7 +243,6 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
if runtime.GOOS == constants.Windows {
cmd = exec.Command("cmd", "/C", cmdStr)
} else {
cmd = exec.Command("")
cmd = exec.Command("sh", "-c", cmdStr)
}

View File

@@ -3,11 +3,11 @@
:title="$t('Notification')"
:visible="visible"
class="crawl-confirm-dialog"
width="480px"
width="540px"
:before-close="beforeClose"
>
<div style="margin-bottom: 20px;">{{$t('Are you sure to run this spider?')}}</div>
<el-form label-width="80px" :model="form" ref="form">
<el-form label-width="120px" :model="form" ref="form">
<el-form-item :label="$t('Run Type')" prop="runType" required inline-message>
<el-select v-model="form.runType" :placeholder="$t('Run Type')">
<el-option value="all-nodes" :label="$t('All Nodes')"/>
@@ -26,6 +26,16 @@
/>
</el-select>
</el-form-item>
<el-form-item v-if="spiderForm.is_scrapy" :label="$t('Scrapy Spider')" prop="spider" required inline-message>
<el-select v-model="form.spider" :placeholder="$t('Scrapy Spider')" :disabled="isLoading">
<el-option
v-for="s in spiderForm.spider_names"
:key="s"
:label="s"
:value="s"
/>
</el-select>
</el-form-item>
<el-form-item :label="$t('Parameters')" prop="param" inline-message>
<el-input v-model="form.param" :placeholder="$t('Parameters')"></el-input>
</el-form-item>
@@ -44,14 +54,17 @@
</el-form>
<template slot="footer">
<el-button type="plain" size="small" @click="$emit('close')">{{$t('Cancel')}}</el-button>
<el-button type="primary" size="small" @click="onConfirm" :disabled="!isAllowDisclaimer">{{$t('Confirm')}}
<el-button type="primary" size="small" @click="onConfirm" :disabled="isConfirmDisabled">
{{$t('Confirm')}}
</el-button>
</template>
</el-dialog>
</template>
<script>
import request from '../../api/request'
import {
mapState
} from 'vuex'
export default {
name: 'CrawlConfirmDialog',
@@ -70,11 +83,30 @@ export default {
form: {
runType: 'random',
nodeIds: undefined,
spider: undefined,
param: '',
nodeList: []
},
isAllowDisclaimer: true,
isRedirect: true
isRedirect: true,
isLoading: false
}
},
computed: {
...mapState('spider', [
'spiderForm'
]),
isConfirmDisabled () {
if (this.isLoading) return true
if (!this.isAllowDisclaimer) return true
return false
}
},
watch: {
visible (value) {
if (value) {
this.onOpen()
}
}
},
methods: {
@@ -88,7 +120,7 @@ export default {
const res = await this.$store.dispatch('spider/crawlSpider', {
spiderId: this.spiderId,
nodeIds: this.form.nodeIds,
param: this.form.param,
param: this.form.param + ' ' + this.form.spider,
runType: this.form.runType
})
@@ -107,21 +139,32 @@ export default {
},
onClickDisclaimer () {
this.$router.push('/disclaimer')
}
},
created () {
// 节点列表
request.get('/nodes', {}).then(response => {
this.nodeList = response.data.data.map(d => {
d.systemInfo = {
os: '',
arch: '',
num_cpu: '',
executables: []
}
return d
},
async onOpen () {
// 节点列表
this.$request.get('/nodes', {}).then(response => {
this.nodeList = response.data.data.map(d => {
d.systemInfo = {
os: '',
arch: '',
num_cpu: '',
executables: []
}
return d
})
})
})
// 爬虫列表
this.isLoading = true
await this.$store.dispatch('spider/getSpiderData', this.spiderId)
if (this.spiderForm.is_scrapy) {
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.spiderId)
if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) {
this.$set(this.form, 'spider', this.spiderForm.spider_names[0])
}
}
this.isLoading = false
}
}
}
</script>

View File

@@ -36,24 +36,19 @@
<el-form-item :label="$t('Source Folder')">
<el-input v-model="spiderForm.src" :placeholder="$t('Source Folder')" disabled></el-input>
</el-form-item>
<el-form-item v-if="spiderForm.type === 'customized'" :label="$t('Execute Command')" prop="cmd" required
:inline-message="true">
<el-input v-model="spiderForm.cmd" :placeholder="$t('Execute Command')"
:disabled="isView"></el-input>
</el-form-item>
<template v-if="spiderForm.type === 'customized'">
<el-form-item :label="$t('Execute Command')" prop="cmd" required :inline-message="true">
<el-input
v-model="spiderForm.cmd"
:placeholder="$t('Execute Command')"
:disabled="isView || spiderForm.is_scrapy"
/>
</el-form-item>
</template>
<el-form-item :label="$t('Results Collection')" prop="col" required :inline-message="true">
<el-input v-model="spiderForm.col" :placeholder="$t('Results Collection')"
:disabled="isView"></el-input>
</el-form-item>
<el-form-item v-if="false" :label="$t('Site')">
<el-autocomplete v-model="spiderForm.site"
:placeholder="$t('Site')"
:fetch-suggestions="fetchSiteSuggestions"
clearable
:disabled="isView"
@select="onSiteSelect">
</el-autocomplete>
</el-form-item>
<el-form-item :label="$t('Spider Type')">
<el-select v-model="spiderForm.type" :placeholder="$t('Spider Type')" :disabled="true" clearable>
<el-option value="configurable" :label="$t('Configurable')"></el-option>
@@ -63,6 +58,13 @@
<el-form-item :label="$t('Remark')">
<el-input type="textarea" v-model="spiderForm.remark" :placeholder="$t('Remark')" :disabled="isView"/>
</el-form-item>
<el-form-item v-if="spiderForm.type === 'customized'" :label="$t('Is Scrapy')" prop="is_scrapy">
<el-switch
v-model="spiderForm.is_scrapy"
active-color="#13ce66"
@change="onIsScrapyChange"
/>
</el-form-item>
</el-form>
</el-row>
<el-row class="button-container" v-if="!isView">
@@ -159,16 +161,14 @@ export default {
this.$st.sendEv('爬虫详情', '概览', '点击运行')
},
onSave () {
this.$refs['spiderForm'].validate(res => {
if (res) {
this.$store.dispatch('spider/editSpider')
.then(() => {
this.$message.success(this.$t('Spider info has been saved successfully'))
})
.catch(error => {
this.$message.error(error)
})
this.$refs['spiderForm'].validate(async valid => {
if (!valid) return
const res = await this.$store.dispatch('spider/editSpider')
if (!res.data.error) {
this.$message.success(this.$t('Spider info has been saved successfully'))
}
await this.$store.dispatch('spider/getSpiderData', this.$route.params.id)
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id)
})
this.$st.sendEv('爬虫详情', '概览', '保存')
},
@@ -197,6 +197,11 @@ export default {
},
onUploadError () {
this.uploadLoading = false
},
onIsScrapyChange (value) {
if (value) {
this.spiderForm.cmd = 'scrapy crawl'
}
}
},
async created () {

View File

@@ -47,6 +47,10 @@ export default {
return 'el-icon-loading'
} else if (this.status === 'error') {
return 'el-icon-error'
} else if (this.status === 'cancelled') {
return 'el-icon-video-pause'
} else if (this.status === 'abnormal') {
return 'el-icon-question'
}
return ''
}

View File

@@ -182,6 +182,8 @@ export default {
'Settings': '设置',
'Display Name': '显示名称',
'Template': '模版',
'Is Scrapy': '是否为 Scrapy',
'Scrapy Spider': 'Scrapy 爬虫',
// 爬虫列表
'Name': '名称',

View File

@@ -116,6 +116,11 @@ const actions = {
commit('SET_SPIDER_FORM', data)
})
},
async getSpiderScrapySpiders ({ state, commit }, id) {
const res = await request.get(`/spiders/${id}/scrapy/spiders`)
state.spiderForm.spider_names = res.data.data
commit('SET_SPIDER_FORM', state.spiderForm)
},
crawlSpider ({ state, dispatch }, payload) {
const { spiderId, runType, nodeIds, param } = payload
return request.put(`/tasks`, {

View File

@@ -83,6 +83,17 @@
</el-option>
</el-select>
</el-form-item>
<el-form-item v-if="spiderForm.is_scrapy" :label="$t('Scrapy Spider')" prop="scrapy_spider" required
inline-message>
<el-select v-model="scheduleForm.scrapy_spider" :placeholder="$t('Scrapy Spider')" :disabled="isLoading">
<el-option
v-for="s in spiderForm.spider_names"
:key="s"
:label="s"
:value="s"
/>
</el-select>
</el-form-item>
<el-form-item :label="$t('Cron')" prop="cron" required>
<el-popover v-model="isShowCron" trigger="focus">
<template>
@@ -103,7 +114,7 @@
<el-form-item :label="$t('Execute Command')" prop="params">
<el-input
id="cmd"
v-model="spider.cmd"
v-model="spiderForm.cmd"
:placeholder="$t('Execute Command')"
disabled
/>
@@ -123,7 +134,7 @@
<!--取消保存-->
<span slot="footer" class="dialog-footer">
<el-button size="small" @click="onCancel">{{$t('Cancel')}}</el-button>
<el-button id="btn-submit" size="small" type="primary" @click="onAddSubmit">{{$t('Submit')}}</el-button>
<el-button id="btn-submit" size="small" type="primary" @click="onAddSubmit" :disabled="isLoading">{{$t('Submit')}}</el-button>
</span>
</el-dialog>
@@ -246,6 +257,7 @@ export default {
{ name: 'run_type', label: 'Run Type', width: '120px' },
{ name: 'node_names', label: 'Node', width: '150px' },
{ name: 'spider_name', label: 'Spider', width: '150px' },
{ name: 'scrapy_spider', label: 'Scrapy Spider', width: '150px' },
{ name: 'param', label: 'Parameters', width: '150px' },
{ name: 'description', label: 'Description', width: '200px' },
{ name: 'enable', label: 'Enable/Disable', width: '120px' }
@@ -259,6 +271,7 @@ export default {
spiderList: [],
nodeList: [],
isShowCron: false,
isLoading: false,
// tutorial
tourSteps: [
@@ -379,6 +392,9 @@ export default {
}
},
computed: {
...mapState('spider', [
'spiderForm'
]),
...mapState('schedule', [
'scheduleList',
'scheduleForm'
@@ -456,11 +472,23 @@ export default {
},
isShowRun (row) {
},
onEdit (row) {
async onEdit (row) {
this.$store.commit('schedule/SET_SCHEDULE_FORM', row)
this.dialogVisible = true
this.isEdit = true
this.$st.sendEv('定时任务', '修改定时任务')
this.isLoading = true
await this.$store.dispatch('spider/getSpiderData', row.spider_id)
if (this.spiderForm.is_scrapy) {
await this.$store.dispatch('spider/getSpiderScrapySpiders', row.spider_id)
if (!this.scheduleForm.scrapy_spider) {
if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) {
this.$set(this.scheduleForm, 'scrapy_spider', this.spiderForm.spider_names[0])
}
}
}
this.isLoading = false
},
onRemove (row) {
this.$confirm(this.$t('Are you sure to delete the schedule task?'), this.$t('Notification'), {

View File

@@ -217,6 +217,11 @@ export default {
// get spider list
await this.$store.dispatch('spider/getSpiderList')
// get scrapy spider names
if (this.spiderForm.is_scrapy) {
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id)
}
// if spider is configurable spider, set to config tab by default
// if (this.spiderForm.type === 'configurable') {
// this.activeTabName = 'config'

View File

@@ -309,6 +309,23 @@
<status-tag :status="scope.row.last_status"/>
</template>
</el-table-column>
<el-table-column
v-else-if="col.name === 'is_scrapy'"
:key="col.name"
:label="$t(col.label)"
align="left"
:width="col.width"
:sortable="col.sortable"
>
<template slot-scope="scope">
<el-switch
v-if="scope.row.type === 'customized'"
v-model="scope.row.is_scrapy"
active-color="#13ce66"
disabled
/>
</template>
</el-table-column>
<el-table-column
v-else
:key="col.name"
@@ -397,6 +414,7 @@ export default {
columns: [
{ name: 'display_name', label: 'Name', width: '160', align: 'left', sortable: true },
{ name: 'type', label: 'Spider Type', width: '120', sortable: true },
{ name: 'is_scrapy', label: 'Is Scrapy', width: '80' },
{ name: 'last_status', label: 'Last Status', width: '120' },
{ name: 'last_run_ts', label: 'Last Run', width: '140' },
{ name: 'update_ts', label: 'Update Time', width: '140' },