mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
加入scrapy爬虫支持
This commit is contained in:
@@ -37,6 +37,10 @@ type Spider struct {
|
||||
// 自定义爬虫
|
||||
Cmd string `json:"cmd" bson:"cmd"` // 执行命令
|
||||
|
||||
// Scrapy 爬虫(属于自定义爬虫)
|
||||
IsScrapy bool `json:"is_scrapy" bson:"is_scrapy"` // 是否为 Scrapy 爬虫
|
||||
SpiderNames []string `json:"spider_names" bson:"spider_names"` // 爬虫名称列表
|
||||
|
||||
// 可配置爬虫
|
||||
Template string `json:"template" bson:"template"` // Spiderfile模版
|
||||
|
||||
|
||||
@@ -88,15 +88,16 @@ func GetSpider(c *gin.Context) {
|
||||
HandleErrorF(http.StatusBadRequest, c, "invalid id")
|
||||
}
|
||||
|
||||
result, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: result,
|
||||
Data: spider,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -901,3 +902,30 @@ func GetSpiderSchedules(c *gin.Context) {
|
||||
Data: list,
|
||||
})
|
||||
}
|
||||
|
||||
func GetSpiderScrapySpiders(c *gin.Context) {
|
||||
id := c.Param("id")
|
||||
|
||||
if !bson.IsObjectIdHex(id) {
|
||||
HandleErrorF(http.StatusBadRequest, c, "spider_id is invalid")
|
||||
return
|
||||
}
|
||||
|
||||
spider, err := model.GetSpider(bson.ObjectIdHex(id))
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
spiderNames, err := services.GetScrapySpiderNames(spider)
|
||||
if err != nil {
|
||||
HandleError(http.StatusInternalServerError, c, err)
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, Response{
|
||||
Status: "ok",
|
||||
Message: "success",
|
||||
Data: spiderNames,
|
||||
})
|
||||
}
|
||||
|
||||
32
backend/services/scrapy.go
Normal file
32
backend/services/scrapy.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crawlab/model"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func GetScrapySpiderNames(s model.Spider) ([]string, error) {
|
||||
var stdout bytes.Buffer
|
||||
var stderr bytes.Buffer
|
||||
|
||||
cmd := exec.Command("scrapy", "list")
|
||||
cmd.Dir = s.Src
|
||||
cmd.Stdout = &stdout
|
||||
cmd.Stderr = &stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
|
||||
spiderNames := strings.Split(stdout.String(), "\n")
|
||||
|
||||
var res []string
|
||||
for _, sn := range spiderNames {
|
||||
if sn != "" {
|
||||
res = append(res, sn)
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
@@ -243,7 +243,6 @@ func ExecuteShellCmd(cmdStr string, cwd string, t model.Task, s model.Spider) (e
|
||||
if runtime.GOOS == constants.Windows {
|
||||
cmd = exec.Command("cmd", "/C", cmdStr)
|
||||
} else {
|
||||
cmd = exec.Command("")
|
||||
cmd = exec.Command("sh", "-c", cmdStr)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
:title="$t('Notification')"
|
||||
:visible="visible"
|
||||
class="crawl-confirm-dialog"
|
||||
width="480px"
|
||||
width="540px"
|
||||
:before-close="beforeClose"
|
||||
>
|
||||
<div style="margin-bottom: 20px;">{{$t('Are you sure to run this spider?')}}</div>
|
||||
<el-form label-width="80px" :model="form" ref="form">
|
||||
<el-form label-width="120px" :model="form" ref="form">
|
||||
<el-form-item :label="$t('Run Type')" prop="runType" required inline-message>
|
||||
<el-select v-model="form.runType" :placeholder="$t('Run Type')">
|
||||
<el-option value="all-nodes" :label="$t('All Nodes')"/>
|
||||
@@ -26,6 +26,16 @@
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item v-if="spiderForm.is_scrapy" :label="$t('Scrapy Spider')" prop="spider" required inline-message>
|
||||
<el-select v-model="form.spider" :placeholder="$t('Scrapy Spider')" :disabled="isLoading">
|
||||
<el-option
|
||||
v-for="s in spiderForm.spider_names"
|
||||
:key="s"
|
||||
:label="s"
|
||||
:value="s"
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Parameters')" prop="param" inline-message>
|
||||
<el-input v-model="form.param" :placeholder="$t('Parameters')"></el-input>
|
||||
</el-form-item>
|
||||
@@ -44,14 +54,17 @@
|
||||
</el-form>
|
||||
<template slot="footer">
|
||||
<el-button type="plain" size="small" @click="$emit('close')">{{$t('Cancel')}}</el-button>
|
||||
<el-button type="primary" size="small" @click="onConfirm" :disabled="!isAllowDisclaimer">{{$t('Confirm')}}
|
||||
<el-button type="primary" size="small" @click="onConfirm" :disabled="isConfirmDisabled">
|
||||
{{$t('Confirm')}}
|
||||
</el-button>
|
||||
</template>
|
||||
</el-dialog>
|
||||
</template>
|
||||
|
||||
<script>
|
||||
import request from '../../api/request'
|
||||
import {
|
||||
mapState
|
||||
} from 'vuex'
|
||||
|
||||
export default {
|
||||
name: 'CrawlConfirmDialog',
|
||||
@@ -70,11 +83,30 @@ export default {
|
||||
form: {
|
||||
runType: 'random',
|
||||
nodeIds: undefined,
|
||||
spider: undefined,
|
||||
param: '',
|
||||
nodeList: []
|
||||
},
|
||||
isAllowDisclaimer: true,
|
||||
isRedirect: true
|
||||
isRedirect: true,
|
||||
isLoading: false
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapState('spider', [
|
||||
'spiderForm'
|
||||
]),
|
||||
isConfirmDisabled () {
|
||||
if (this.isLoading) return true
|
||||
if (!this.isAllowDisclaimer) return true
|
||||
return false
|
||||
}
|
||||
},
|
||||
watch: {
|
||||
visible (value) {
|
||||
if (value) {
|
||||
this.onOpen()
|
||||
}
|
||||
}
|
||||
},
|
||||
methods: {
|
||||
@@ -88,7 +120,7 @@ export default {
|
||||
const res = await this.$store.dispatch('spider/crawlSpider', {
|
||||
spiderId: this.spiderId,
|
||||
nodeIds: this.form.nodeIds,
|
||||
param: this.form.param,
|
||||
param: this.form.param + ' ' + this.form.spider,
|
||||
runType: this.form.runType
|
||||
})
|
||||
|
||||
@@ -107,21 +139,32 @@ export default {
|
||||
},
|
||||
onClickDisclaimer () {
|
||||
this.$router.push('/disclaimer')
|
||||
}
|
||||
},
|
||||
created () {
|
||||
// 节点列表
|
||||
request.get('/nodes', {}).then(response => {
|
||||
this.nodeList = response.data.data.map(d => {
|
||||
d.systemInfo = {
|
||||
os: '',
|
||||
arch: '',
|
||||
num_cpu: '',
|
||||
executables: []
|
||||
}
|
||||
return d
|
||||
},
|
||||
async onOpen () {
|
||||
// 节点列表
|
||||
this.$request.get('/nodes', {}).then(response => {
|
||||
this.nodeList = response.data.data.map(d => {
|
||||
d.systemInfo = {
|
||||
os: '',
|
||||
arch: '',
|
||||
num_cpu: '',
|
||||
executables: []
|
||||
}
|
||||
return d
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// 爬虫列表
|
||||
this.isLoading = true
|
||||
await this.$store.dispatch('spider/getSpiderData', this.spiderId)
|
||||
if (this.spiderForm.is_scrapy) {
|
||||
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.spiderId)
|
||||
if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) {
|
||||
this.$set(this.form, 'spider', this.spiderForm.spider_names[0])
|
||||
}
|
||||
}
|
||||
this.isLoading = false
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -36,24 +36,19 @@
|
||||
<el-form-item :label="$t('Source Folder')">
|
||||
<el-input v-model="spiderForm.src" :placeholder="$t('Source Folder')" disabled></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item v-if="spiderForm.type === 'customized'" :label="$t('Execute Command')" prop="cmd" required
|
||||
:inline-message="true">
|
||||
<el-input v-model="spiderForm.cmd" :placeholder="$t('Execute Command')"
|
||||
:disabled="isView"></el-input>
|
||||
</el-form-item>
|
||||
<template v-if="spiderForm.type === 'customized'">
|
||||
<el-form-item :label="$t('Execute Command')" prop="cmd" required :inline-message="true">
|
||||
<el-input
|
||||
v-model="spiderForm.cmd"
|
||||
:placeholder="$t('Execute Command')"
|
||||
:disabled="isView || spiderForm.is_scrapy"
|
||||
/>
|
||||
</el-form-item>
|
||||
</template>
|
||||
<el-form-item :label="$t('Results Collection')" prop="col" required :inline-message="true">
|
||||
<el-input v-model="spiderForm.col" :placeholder="$t('Results Collection')"
|
||||
:disabled="isView"></el-input>
|
||||
</el-form-item>
|
||||
<el-form-item v-if="false" :label="$t('Site')">
|
||||
<el-autocomplete v-model="spiderForm.site"
|
||||
:placeholder="$t('Site')"
|
||||
:fetch-suggestions="fetchSiteSuggestions"
|
||||
clearable
|
||||
:disabled="isView"
|
||||
@select="onSiteSelect">
|
||||
</el-autocomplete>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Spider Type')">
|
||||
<el-select v-model="spiderForm.type" :placeholder="$t('Spider Type')" :disabled="true" clearable>
|
||||
<el-option value="configurable" :label="$t('Configurable')"></el-option>
|
||||
@@ -63,6 +58,13 @@
|
||||
<el-form-item :label="$t('Remark')">
|
||||
<el-input type="textarea" v-model="spiderForm.remark" :placeholder="$t('Remark')" :disabled="isView"/>
|
||||
</el-form-item>
|
||||
<el-form-item v-if="spiderForm.type === 'customized'" :label="$t('Is Scrapy')" prop="is_scrapy">
|
||||
<el-switch
|
||||
v-model="spiderForm.is_scrapy"
|
||||
active-color="#13ce66"
|
||||
@change="onIsScrapyChange"
|
||||
/>
|
||||
</el-form-item>
|
||||
</el-form>
|
||||
</el-row>
|
||||
<el-row class="button-container" v-if="!isView">
|
||||
@@ -159,16 +161,14 @@ export default {
|
||||
this.$st.sendEv('爬虫详情', '概览', '点击运行')
|
||||
},
|
||||
onSave () {
|
||||
this.$refs['spiderForm'].validate(res => {
|
||||
if (res) {
|
||||
this.$store.dispatch('spider/editSpider')
|
||||
.then(() => {
|
||||
this.$message.success(this.$t('Spider info has been saved successfully'))
|
||||
})
|
||||
.catch(error => {
|
||||
this.$message.error(error)
|
||||
})
|
||||
this.$refs['spiderForm'].validate(async valid => {
|
||||
if (!valid) return
|
||||
const res = await this.$store.dispatch('spider/editSpider')
|
||||
if (!res.data.error) {
|
||||
this.$message.success(this.$t('Spider info has been saved successfully'))
|
||||
}
|
||||
await this.$store.dispatch('spider/getSpiderData', this.$route.params.id)
|
||||
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id)
|
||||
})
|
||||
this.$st.sendEv('爬虫详情', '概览', '保存')
|
||||
},
|
||||
@@ -197,6 +197,11 @@ export default {
|
||||
},
|
||||
onUploadError () {
|
||||
this.uploadLoading = false
|
||||
},
|
||||
onIsScrapyChange (value) {
|
||||
if (value) {
|
||||
this.spiderForm.cmd = 'scrapy crawl'
|
||||
}
|
||||
}
|
||||
},
|
||||
async created () {
|
||||
|
||||
@@ -47,6 +47,10 @@ export default {
|
||||
return 'el-icon-loading'
|
||||
} else if (this.status === 'error') {
|
||||
return 'el-icon-error'
|
||||
} else if (this.status === 'cancelled') {
|
||||
return 'el-icon-video-pause'
|
||||
} else if (this.status === 'abnormal') {
|
||||
return 'el-icon-question'
|
||||
}
|
||||
return ''
|
||||
}
|
||||
|
||||
@@ -182,6 +182,8 @@ export default {
|
||||
'Settings': '设置',
|
||||
'Display Name': '显示名称',
|
||||
'Template': '模版',
|
||||
'Is Scrapy': '是否为 Scrapy',
|
||||
'Scrapy Spider': 'Scrapy 爬虫',
|
||||
|
||||
// 爬虫列表
|
||||
'Name': '名称',
|
||||
|
||||
@@ -116,6 +116,11 @@ const actions = {
|
||||
commit('SET_SPIDER_FORM', data)
|
||||
})
|
||||
},
|
||||
async getSpiderScrapySpiders ({ state, commit }, id) {
|
||||
const res = await request.get(`/spiders/${id}/scrapy/spiders`)
|
||||
state.spiderForm.spider_names = res.data.data
|
||||
commit('SET_SPIDER_FORM', state.spiderForm)
|
||||
},
|
||||
crawlSpider ({ state, dispatch }, payload) {
|
||||
const { spiderId, runType, nodeIds, param } = payload
|
||||
return request.put(`/tasks`, {
|
||||
|
||||
@@ -83,6 +83,17 @@
|
||||
</el-option>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item v-if="spiderForm.is_scrapy" :label="$t('Scrapy Spider')" prop="scrapy_spider" required
|
||||
inline-message>
|
||||
<el-select v-model="scheduleForm.scrapy_spider" :placeholder="$t('Scrapy Spider')" :disabled="isLoading">
|
||||
<el-option
|
||||
v-for="s in spiderForm.spider_names"
|
||||
:key="s"
|
||||
:label="s"
|
||||
:value="s"
|
||||
/>
|
||||
</el-select>
|
||||
</el-form-item>
|
||||
<el-form-item :label="$t('Cron')" prop="cron" required>
|
||||
<el-popover v-model="isShowCron" trigger="focus">
|
||||
<template>
|
||||
@@ -103,7 +114,7 @@
|
||||
<el-form-item :label="$t('Execute Command')" prop="params">
|
||||
<el-input
|
||||
id="cmd"
|
||||
v-model="spider.cmd"
|
||||
v-model="spiderForm.cmd"
|
||||
:placeholder="$t('Execute Command')"
|
||||
disabled
|
||||
/>
|
||||
@@ -123,7 +134,7 @@
|
||||
<!--取消、保存-->
|
||||
<span slot="footer" class="dialog-footer">
|
||||
<el-button size="small" @click="onCancel">{{$t('Cancel')}}</el-button>
|
||||
<el-button id="btn-submit" size="small" type="primary" @click="onAddSubmit">{{$t('Submit')}}</el-button>
|
||||
<el-button id="btn-submit" size="small" type="primary" @click="onAddSubmit" :disabled="isLoading">{{$t('Submit')}}</el-button>
|
||||
</span>
|
||||
</el-dialog>
|
||||
|
||||
@@ -246,6 +257,7 @@ export default {
|
||||
{ name: 'run_type', label: 'Run Type', width: '120px' },
|
||||
{ name: 'node_names', label: 'Node', width: '150px' },
|
||||
{ name: 'spider_name', label: 'Spider', width: '150px' },
|
||||
{ name: 'scrapy_spider', label: 'Scrapy Spider', width: '150px' },
|
||||
{ name: 'param', label: 'Parameters', width: '150px' },
|
||||
{ name: 'description', label: 'Description', width: '200px' },
|
||||
{ name: 'enable', label: 'Enable/Disable', width: '120px' }
|
||||
@@ -259,6 +271,7 @@ export default {
|
||||
spiderList: [],
|
||||
nodeList: [],
|
||||
isShowCron: false,
|
||||
isLoading: false,
|
||||
|
||||
// tutorial
|
||||
tourSteps: [
|
||||
@@ -379,6 +392,9 @@ export default {
|
||||
}
|
||||
},
|
||||
computed: {
|
||||
...mapState('spider', [
|
||||
'spiderForm'
|
||||
]),
|
||||
...mapState('schedule', [
|
||||
'scheduleList',
|
||||
'scheduleForm'
|
||||
@@ -456,11 +472,23 @@ export default {
|
||||
},
|
||||
isShowRun (row) {
|
||||
},
|
||||
onEdit (row) {
|
||||
async onEdit (row) {
|
||||
this.$store.commit('schedule/SET_SCHEDULE_FORM', row)
|
||||
this.dialogVisible = true
|
||||
this.isEdit = true
|
||||
this.$st.sendEv('定时任务', '修改定时任务')
|
||||
|
||||
this.isLoading = true
|
||||
await this.$store.dispatch('spider/getSpiderData', row.spider_id)
|
||||
if (this.spiderForm.is_scrapy) {
|
||||
await this.$store.dispatch('spider/getSpiderScrapySpiders', row.spider_id)
|
||||
if (!this.scheduleForm.scrapy_spider) {
|
||||
if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) {
|
||||
this.$set(this.scheduleForm, 'scrapy_spider', this.spiderForm.spider_names[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
this.isLoading = false
|
||||
},
|
||||
onRemove (row) {
|
||||
this.$confirm(this.$t('Are you sure to delete the schedule task?'), this.$t('Notification'), {
|
||||
|
||||
@@ -217,6 +217,11 @@ export default {
|
||||
// get spider list
|
||||
await this.$store.dispatch('spider/getSpiderList')
|
||||
|
||||
// get scrapy spider names
|
||||
if (this.spiderForm.is_scrapy) {
|
||||
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.$route.params.id)
|
||||
}
|
||||
|
||||
// if spider is configurable spider, set to config tab by default
|
||||
// if (this.spiderForm.type === 'configurable') {
|
||||
// this.activeTabName = 'config'
|
||||
|
||||
@@ -309,6 +309,23 @@
|
||||
<status-tag :status="scope.row.last_status"/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column
|
||||
v-else-if="col.name === 'is_scrapy'"
|
||||
:key="col.name"
|
||||
:label="$t(col.label)"
|
||||
align="left"
|
||||
:width="col.width"
|
||||
:sortable="col.sortable"
|
||||
>
|
||||
<template slot-scope="scope">
|
||||
<el-switch
|
||||
v-if="scope.row.type === 'customized'"
|
||||
v-model="scope.row.is_scrapy"
|
||||
active-color="#13ce66"
|
||||
disabled
|
||||
/>
|
||||
</template>
|
||||
</el-table-column>
|
||||
<el-table-column
|
||||
v-else
|
||||
:key="col.name"
|
||||
@@ -397,6 +414,7 @@ export default {
|
||||
columns: [
|
||||
{ name: 'display_name', label: 'Name', width: '160', align: 'left', sortable: true },
|
||||
{ name: 'type', label: 'Spider Type', width: '120', sortable: true },
|
||||
{ name: 'is_scrapy', label: 'Is Scrapy', width: '80' },
|
||||
{ name: 'last_status', label: 'Last Status', width: '120' },
|
||||
{ name: 'last_run_ts', label: 'Last Run', width: '140' },
|
||||
{ name: 'update_ts', label: 'Update Time', width: '140' },
|
||||
|
||||
Reference in New Issue
Block a user