Merge pull request #585 from crawlab-team/develop

Develop
This commit is contained in:
Marvin Zhang
2020-02-22 19:14:19 +08:00
committed by GitHub
15 changed files with 429 additions and 48 deletions

View File

@@ -162,6 +162,7 @@ func main() {
authGroup.POST("/spiders/:id", routes.PostSpider) // 修改爬虫
authGroup.POST("/spiders/:id/publish", routes.PublishSpider) // 发布爬虫
authGroup.POST("/spiders/:id/upload", routes.UploadSpiderFromId) // 上传爬虫ID
authGroup.DELETE("/spiders", routes.DeleteSelectedSpider) // 删除选择的爬虫
authGroup.DELETE("/spiders/:id", routes.DeleteSpider) // 删除爬虫
authGroup.GET("/spiders/:id/tasks", routes.GetSpiderTasks) // 爬虫任务列表
authGroup.GET("/spiders/:id/file/tree", routes.GetSpiderFileTree) // 爬虫文件目录树读取
@@ -184,6 +185,8 @@ func main() {
authGroup.GET("/spiders/:id/scrapy/spider/filepath", routes.GetSpiderScrapySpiderFilepath) // Scrapy 爬虫 pipelines
authGroup.POST("/spiders/:id/git/sync", routes.PostSpiderSyncGit) // 爬虫 Git 同步
authGroup.POST("/spiders/:id/git/reset", routes.PostSpiderResetGit) // 爬虫 Git 重置
authGroup.POST("/spiders-cancel", routes.CancelSelectedSpider) // 停止所选爬虫任务
authGroup.POST("/spiders-run", routes.RunSelectedSpider) // 运行所选爬虫
}
// 可配置爬虫
{
@@ -201,8 +204,8 @@ func main() {
authGroup.GET("/tasks/:id", routes.GetTask) // 任务详情
authGroup.PUT("/tasks", routes.PutTask) // 派发任务
authGroup.DELETE("/tasks/:id", routes.DeleteTask) // 删除任务
authGroup.DELETE("/tasks_multiple", routes.DeleteMultipleTask) // 删除多个任务
authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) //删除指定状态的任务
authGroup.DELETE("/tasks", routes.DeleteSelectedTask) // 删除多个任务
authGroup.DELETE("/tasks_by_status", routes.DeleteTaskByStatus) // 删除指定状态的任务
authGroup.POST("/tasks/:id/cancel", routes.CancelTask) // 取消任务
authGroup.GET("/tasks/:id/log", routes.GetTaskLog) // 任务日志
authGroup.GET("/tasks/:id/results", routes.GetTaskResults) // 任务结果

View File

@@ -279,6 +279,8 @@ func RemoveSpider(id bson.ObjectId) error {
var result Spider
if err := c.FindId(id).One(&result); err != nil {
log.Errorf("find spider error: %s, id:%s", err.Error(), id.Hex())
debug.PrintStack()
return err
}
@@ -291,12 +293,10 @@ func RemoveSpider(id bson.ObjectId) error {
// gf上的文件
s, gf := database.GetGridFs("files")
defer s.Close()
if result.FileId.Hex() != constants.ObjectIdNull {
if err := gf.RemoveId(result.FileId); err != nil {
log.Error("remove file error, id:" + result.FileId.Hex())
debug.PrintStack()
return err
}
}

View File

@@ -482,6 +482,149 @@ func DeleteSpider(c *gin.Context) {
})
}
func DeleteSelectedSpider(c *gin.Context) {
type ReqBody struct {
SpiderIds []string `json:"spider_ids"`
}
var reqBody ReqBody
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleErrorF(http.StatusBadRequest, c, "invalid request")
return
}
for _, spiderId := range reqBody.SpiderIds {
if err := services.RemoveSpider(spiderId); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
}
// 更新 GitCron
if err := services.GitCron.Update(); err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func CancelSelectedSpider(c *gin.Context) {
type ReqBody struct {
SpiderIds []string `json:"spider_ids"`
}
var reqBody ReqBody
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleErrorF(http.StatusBadRequest, c, "invalid request")
return
}
for _, spiderId := range reqBody.SpiderIds {
if err := services.CancelSpider(spiderId); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
})
}
func RunSelectedSpider(c *gin.Context) {
type TaskParam struct {
SpiderId bson.ObjectId `json:"spider_id"`
Param string `json:"param"`
}
type ReqBody struct {
RunType string `json:"run_type"`
NodeIds []bson.ObjectId `json:"node_ids"`
TaskParams []TaskParam `json:"task_params"`
}
var reqBody ReqBody
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleErrorF(http.StatusBadRequest, c, "invalid request")
return
}
// 任务ID
var taskIds []string
// 遍历爬虫
// TODO: 优化此部分代码,与 routes.PutTask 有重合部分
for _, taskParam := range reqBody.TaskParams {
if reqBody.RunType == constants.RunTypeAllNodes {
// 所有节点
nodes, err := model.GetNodeList(nil)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
for _, node := range nodes {
t := model.Task{
SpiderId: taskParam.SpiderId,
NodeId: node.Id,
Param: taskParam.Param,
UserId: services.GetCurrentUser(c).Id,
}
id, err := services.AddTask(t)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
taskIds = append(taskIds, id)
}
} else if reqBody.RunType == constants.RunTypeRandom {
// 随机
t := model.Task{
SpiderId: taskParam.SpiderId,
Param: taskParam.Param,
UserId: services.GetCurrentUser(c).Id,
}
id, err := services.AddTask(t)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
taskIds = append(taskIds, id)
} else if reqBody.RunType == constants.RunTypeSelectedNodes {
// 指定节点
for _, nodeId := range reqBody.NodeIds {
t := model.Task{
SpiderId: taskParam.SpiderId,
NodeId: nodeId,
Param: taskParam.Param,
UserId: services.GetCurrentUser(c).Id,
}
id, err := services.AddTask(t)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
taskIds = append(taskIds, id)
}
} else {
HandleErrorF(http.StatusInternalServerError, c, "invalid run_type")
return
}
}
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: taskIds,
})
}
func GetSpiderTasks(c *gin.Context) {
id := c.Param("id")

View File

@@ -183,7 +183,7 @@ func DeleteTaskByStatus(c *gin.Context) {
}
// 删除多个任务
func DeleteMultipleTask(c *gin.Context) {
func DeleteSelectedTask(c *gin.Context) {
ids := make(map[string][]string)
if err := c.ShouldBindJSON(&ids); err != nil {
HandleError(http.StatusInternalServerError, c, err)

View File

@@ -2,15 +2,17 @@ package routes
import (
"crawlab/services"
"github.com/apex/log"
"github.com/gin-gonic/gin"
"net/http"
"runtime/debug"
)
func GetLatestRelease(c *gin.Context) {
latestRelease, err := services.GetLatestRelease()
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
log.Errorf(err.Error())
debug.PrintStack()
}
c.JSON(http.StatusOK, Response{
Status: "ok",

View File

@@ -261,6 +261,38 @@ func RemoveSpider(id string) error {
return nil
}
func CancelSpider(id string) error {
// 获取该爬虫
spider, err := model.GetSpider(bson.ObjectIdHex(id))
if err != nil {
return err
}
// 获取该爬虫待定或运行中的任务列表
query := bson.M{
"spider_id": spider.Id,
"status": bson.M{
"$in": []string{
constants.StatusPending,
constants.StatusRunning,
},
},
}
tasks, err := model.GetTaskList(query, 0, constants.Infinite, "-create_ts")
if err != nil {
return err
}
// 遍历任务列表,依次停止
for _, task := range tasks {
if err := CancelTask(task.Id); err != nil {
return err
}
}
return nil
}
// 启动爬虫服务
func InitSpiderService() error {
// 构造定时任务执行器

View File

@@ -46,11 +46,12 @@ func (s *SpiderSync) CheckIsScrapy() {
return
}
s.Spider.IsScrapy = utils.Exists(path.Join(s.Spider.Src, "scrapy.cfg"))
if err := s.Spider.Save(); err != nil {
log.Errorf(err.Error())
debug.PrintStack()
return
}
// TODO: 暂时停用自动检测Scrapy项目功能
//if err := s.Spider.Save(); err != nil {
// log.Errorf(err.Error())
// debug.PrintStack()
// return
//}
}
func (s *SpiderSync) AfterRemoveDownCreate() {

View File

@@ -44,8 +44,13 @@
/>
</el-select>
</el-form-item>
<el-form-item v-if="spiderForm.is_scrapy" :label="$t('Scrapy Log Level')" prop="scrapy_log_level" required
inline-message>
<el-form-item
v-if="spiderForm.is_scrapy || (multiple && scrapySpiders.length > 0)"
:label="$t('Scrapy Log Level')"
prop="scrapy_log_level"
required
inline-message
>
<el-select v-model="form.scrapy_log_level" :placeholder="$t('Scrapy Log Level')">
<el-option value="INFO" label="INFO"/>
<el-option value="DEBUG" label="DEBUG"/>
@@ -68,7 +73,7 @@
<span style="margin-left: 5px">我已阅读并同意 <a href="javascript:"
@click="onClickDisclaimer">免责声明</a> 所有内容</span>
</div>
<div v-if="!spiderForm.is_long_task">
<div v-if="!spiderForm.is_long_task && !multiple">
<el-checkbox v-model="isRedirect"/>
<span style="margin-left: 5px">跳转到任务详情页</span>
</div>
@@ -100,9 +105,19 @@ export default {
type: String,
default: ''
},
spiders: {
type: Array,
default () {
return []
}
},
visible: {
type: Boolean,
default: false
},
multiple: {
type: Boolean,
default: false
}
},
data () {
@@ -118,7 +133,8 @@ export default {
isAllowDisclaimer: true,
isRedirect: true,
isLoading: false,
isParametersVisible: false
isParametersVisible: false,
scrapySpidersNamesDict: {}
}
},
computed: {
@@ -129,6 +145,9 @@ export default {
if (this.isLoading) return true
if (!this.isAllowDisclaimer) return true
return false
},
scrapySpiders () {
return this.spiders.filter(d => d.type === 'customized' && d.is_scrapy)
}
},
watch: {
@@ -145,30 +164,77 @@ export default {
beforeParameterClose () {
this.isParametersVisible = false
},
async fetchScrapySpiderName (id) {
const res = await this.$request.get(`/spiders/${id}/scrapy/spiders`)
this.scrapySpidersNamesDict[id] = res.data.data
},
onConfirm () {
this.$refs['form'].validate(async valid => {
if (!valid) return
let param = this.form.param
if (this.spiderForm.type === 'customized' && this.spiderForm.is_scrapy) {
param = `${this.form.spider} --loglevel=${this.form.scrapy_log_level} ${this.form.param}`
// 请求响应
let res
if (!this.multiple) {
// 运行单个爬虫
// 参数
let param = this.form.param
// Scrapy爬虫特殊处理
if (this.spiderForm.type === 'customized' && this.spiderForm.is_scrapy) {
param = `${this.form.spider} --loglevel=${this.form.scrapy_log_level} ${this.form.param}`
}
// 发起请求
res = await this.$store.dispatch('spider/crawlSpider', {
spiderId: this.spiderId,
nodeIds: this.form.nodeIds,
param,
runType: this.form.runType
})
} else {
// 运行多个爬虫
// 发起请求
res = await this.$store.dispatch('spider/crawlSelectedSpiders', {
nodeIds: this.form.nodeIds,
runType: this.form.runType,
taskParams: this.spiders.map(d => {
// 参数
let param = this.form.param
// Scrapy爬虫特殊处理
if (d.type === 'customized' && d.is_scrapy) {
param = `${this.scrapySpidersNamesDict[d._id] ? this.scrapySpidersNamesDict[d._id][0] : ''} --loglevel=${this.form.scrapy_log_level} ${this.form.param}`
}
return {
spider_id: d._id,
param
}
})
})
}
const res = await this.$store.dispatch('spider/crawlSpider', {
spiderId: this.spiderId,
nodeIds: this.form.nodeIds,
param,
runType: this.form.runType
})
const id = res.data.data[0]
// 消息提示
this.$message.success(this.$t('A task has been scheduled successfully'))
this.$emit('close')
this.$st.sendEv('爬虫确认', '确认运行', this.form.runType)
if (this.multiple) {
this.$st.sendEv('爬虫确认', '确认批量运行', this.form.runType)
} else {
this.$st.sendEv('爬虫确认', '确认运行', this.form.runType)
}
if (this.isRedirect && !this.spiderForm.is_long_task) {
// 是否重定向
if (
this.isRedirect &&
!this.spiderForm.is_long_task &&
!this.multiple
) {
// 返回任务id
const id = res.data.data[0]
this.$router.push('/tasks/' + id)
this.$st.sendEv('爬虫确认', '跳转到任务详情')
}
@@ -194,15 +260,32 @@ export default {
})
// 爬虫列表
this.isLoading = true
await this.$store.dispatch('spider/getSpiderData', this.spiderId)
if (this.spiderForm.is_scrapy) {
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.spiderId)
if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) {
this.$set(this.form, 'spider', this.spiderForm.spider_names[0])
if (!this.multiple) {
// 单个爬虫
this.isLoading = true
try {
await this.$store.dispatch('spider/getSpiderData', this.spiderId)
if (this.spiderForm.is_scrapy) {
await this.$store.dispatch('spider/getSpiderScrapySpiders', this.spiderId)
if (this.spiderForm.spider_names && this.spiderForm.spider_names.length > 0) {
this.$set(this.form, 'spider', this.spiderForm.spider_names[0])
}
}
} finally {
this.isLoading = false
}
} else {
// 多个爬虫
this.isLoading = true
try {
// 遍历 Scrapy 爬虫列表
await Promise.all(this.scrapySpiders.map(async d => {
return this.fetchScrapySpiderName(d._id)
}))
} finally {
this.isLoading = false
}
}
this.isLoading = false
},
onOpenParameters () {
this.isParametersVisible = true

View File

@@ -25,8 +25,8 @@
</el-form-item>
</el-form>
<span slot="footer" class="dialog-footer">
<el-button @click="fileDialogVisible = false">{{$t('Cancel')}}</el-button>
<el-button type="primary" @click="onAddFile">{{$t('Confirm')}}</el-button>
<el-button size="small" @click="fileDialogVisible = false">{{$t('Cancel')}}</el-button>
<el-button size="small" type="primary" @click="onAddFile">{{$t('Confirm')}}</el-button>
</span>
</el-dialog>
@@ -415,7 +415,9 @@ export default {
const data = node.data
this.onFileClick(data)
node.parent.expanded = true
this.$set(this.nodeExpandedDict, node.parent.data.path, true)
node.parent.parent.expanded = true
this.$set(this.nodeExpandedDict, node.parent.parent.data.path, true)
},
clickPipeline () {
const filename = 'pipelines.py'
@@ -428,6 +430,7 @@ export default {
if (dataLv2.path.match(filename)) {
this.onFileClick(dataLv2)
nodeLv1.expanded = true
this.$set(this.nodeExpandedDict, dataLv1.path, true)
return
}
}

View File

@@ -45,7 +45,7 @@
/>
</el-form-item>
</template>
<el-form-item :label="$t('Results Collection')" prop="col" required :inline-message="true">
<el-form-item :label="$t('Results Collection')" prop="col">
<el-input v-model="spiderForm.col" :placeholder="$t('Results Collection')"
:disabled="isView"></el-input>
</el-form-item>

View File

@@ -558,6 +558,10 @@ docker run -d --restart always --name crawlab_worker \\
'Git has been reset successfully': 'Git 已经成功重置',
'This would delete all files of the spider. Are you sure to continue?': '重置将删除该爬虫所有文件您希望继续吗',
'SSH Public Key is copied to the clipboard': 'SSH 公钥已粘贴到剪切板',
'Removed successfully': '已成功删除',
'Are you sure to delete selected items?': '您是否确认删除所选项',
'Are you sure to stop selected items?': '您是否确认停止所选项',
'Sent signals to cancel selected tasks': '已经向所选任务发送取消任务信号',
// 其他
'Star crawlab-team/crawlab on GitHub': ' GitHub 上为 Crawlab 加星吧'

View File

@@ -212,6 +212,14 @@ const actions = {
param: param
})
},
crawlSelectedSpiders ({ state, dispatch }, payload) {
const { taskParams, runType, nodeIds } = payload
return request.post(`/spiders-run`, {
task_params: taskParams,
run_type: runType,
node_ids: nodeIds
})
},
getTaskList ({ state, commit }, id) {
return request.get(`/spiders/${id}/tasks`)
.then(response => {

View File

@@ -136,7 +136,7 @@ const actions = {
})
},
deleteTaskMultiple ({ state }, ids) {
return request.delete(`/tasks_multiple`, {
return request.delete(`/tasks`, {
ids: ids
})
},

View File

@@ -52,7 +52,7 @@
:disabled="spiderForm.is_scrapy"
/>
</el-form-item>
<el-form-item :label="$t('Results')" prop="col" required>
<el-form-item :label="$t('Results')" prop="col">
<el-input id="col" v-model="spiderForm.col" :placeholder="$t('Results')"/>
</el-form-item>
<el-form-item :label="$t('Upload Zip File')" label-width="120px" name="site">
@@ -283,7 +283,9 @@
<crawl-confirm-dialog
:visible="crawlConfirmDialogVisible"
:spider-id="activeSpiderId"
@close="crawlConfirmDialogVisible = false"
:spiders="selectedSpiders"
:multiple="isMultiple"
@close="onCrawlConfirmDialogClose"
@confirm="onCrawlConfirm"
/>
<!--./crawl confirm dialog-->
@@ -336,8 +338,38 @@
</el-form>
</div>
<div class="right">
<el-button size="small" v-if="false" type="primary" icon="fa fa-download" @click="openImportDialog">
{{$t('Import Spiders')}}
<el-button
v-if="this.selectedSpiders.length"
size="small"
type="danger"
icon="el-icon-video-play"
class="btn add"
@click="onCrawlSelectedSpiders"
style="font-weight: bolder"
>
{{$t('Run')}}
</el-button>
<el-button
v-if="this.selectedSpiders.length"
size="small"
type="info"
:icon="isStopLoading ? 'el-icon-loading' : 'el-icon-video-pause'"
class="btn add"
@click="onStopSelectedSpiders"
style="font-weight: bolder"
>
{{$t('Stop')}}
</el-button>
<el-button
v-if="this.selectedSpiders.length"
size="small"
type="danger"
:icon="isRemoveLoading ? 'el-icon-loading' : 'el-icon-delete'"
class="btn add"
@click="onRemoveSelectedSpiders"
style="font-weight: bolder"
>
{{$t('Remove')}}
</el-button>
<el-button
size="small"
@@ -349,7 +381,6 @@
>
{{$t('Add Spider')}}
</el-button>
</div>
</div>
<!--./filter-->
@@ -371,11 +402,20 @@
<el-table
:data="spiderList"
class="table"
ref="table"
:header-cell-style="{background:'rgb(48, 65, 86)',color:'white'}"
border
row-key="_id"
@row-click="onRowClick"
@sort-change="onSortChange"
@selection-change="onSpiderSelect"
>
<el-table-column
type="selection"
width="45"
align="center"
reserve-selection
/>
<template v-for="col in columns">
<el-table-column
v-if="col.name === 'type'"
@@ -740,7 +780,11 @@ export default {
}
},
handle: undefined,
activeSpiderTaskStatus: 'running'
activeSpiderTaskStatus: 'running',
selectedSpiders: [],
isStopLoading: false,
isRemoveLoading: false,
isMultiple: false
}
},
computed: {
@@ -789,6 +833,9 @@ export default {
return this.nodeList.filter(d => {
return d.status === 'online'
})
},
activeSpiderIds () {
return this.selectedSpiders.map(d => d._id)
}
},
methods: {
@@ -1077,6 +1124,61 @@ export default {
if (value) {
this.spiderForm.cmd = 'scrapy crawl'
}
},
onSpiderSelect (spiders) {
this.selectedSpiders = spiders
},
async onRemoveSelectedSpiders () {
this.$confirm(this.$t('Are you sure to delete selected items?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
type: 'warning'
}).then(async () => {
this.isRemoveLoading = true
try {
const res = await this.$request.delete('/spiders', {
spider_ids: this.selectedSpiders.map(d => d._id)
})
if (!res.data.error) {
this.$message.success('Delete successfully')
this.$refs['table'].clearSelection()
await this.getList()
}
} finally {
this.isRemoveLoading = false
}
this.$st.sendEv('爬虫列表', '批量删除爬虫')
})
},
async onStopSelectedSpiders () {
this.$confirm(this.$t('Are you sure to stop selected items?'), this.$t('Notification'), {
confirmButtonText: this.$t('Confirm'),
cancelButtonText: this.$t('Cancel'),
type: 'warning'
}).then(async () => {
this.isStopLoading = true
try {
const res = await this.$request.post('/spiders-cancel', {
spider_ids: this.selectedSpiders.map(d => d._id)
})
if (!res.data.error) {
this.$message.success('Sent signals to cancel selected tasks')
this.$refs['table'].clearSelection()
await this.getList()
}
} finally {
this.isStopLoading = false
}
this.$st.sendEv('爬虫列表', '批量删除爬虫')
})
},
onCrawlSelectedSpiders () {
this.crawlConfirmDialogVisible = true
this.isMultiple = true
},
onCrawlConfirmDialogClose () {
this.crawlConfirmDialogVisible = false
this.isMultiple = false
}
},
async created () {

View File

@@ -59,7 +59,7 @@
@row-click="onRowClick"
@selection-change="onSelectionChange">
>
<el-table-column type="selection" width="55" reserve-selection/>
<el-table-column type="selection" width="45" align="center" reserve-selection/>
<template v-for="col in columns">
<el-table-column v-if="col.name === 'spider_name'"
:key="col.name"