完成爬虫列表

This commit is contained in:
陈景阳
2019-09-26 20:53:05 +08:00
parent bdeaa587f9
commit 0ddb294885
4 changed files with 16 additions and 11 deletions

View File

@@ -93,7 +93,7 @@ func (spider *Spider) GetLastTask() (Task, error) {
return tasks[0], nil
}
func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) {
func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, int, error) {
s, c := database.GetCol("spiders")
defer s.Close()
@@ -101,7 +101,7 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) {
spiders := []Spider{}
if err := c.Find(filter).Skip(skip).Limit(limit).Sort("+name").All(&spiders); err != nil {
debug.PrintStack()
return spiders, err
return spiders, 0, err
}
// 遍历爬虫列表
@@ -119,7 +119,9 @@ func GetSpiderList(filter interface{}, skip int, limit int) ([]Spider, error) {
spiders[i].LastStatus = task.Status
}
return spiders, nil
count, _ := c.Find(filter).Count()
return spiders, count, nil
}
func GetSpiderByName(name string) *Spider {

View File

@@ -24,7 +24,14 @@ import (
)
func GetSpiderList(c *gin.Context) {
results, err := model.GetSpiderList(nil, 0, 0)
pageNumStr, _ := c.GetQuery("pageNum")
pageSizeStr, _ := c.GetQuery("pageSize")
keyword, _ := c.GetQuery("keyword")
pageNum, _ := strconv.Atoi(pageNumStr)
pageSize, _ := strconv.Atoi(pageSizeStr)
skip := pageSize * (pageNum - 1)
filter := bson.M{"name": bson.M{"$regex": bson.RegEx{Pattern: keyword, Options: "im"}}}
results, count, err := model.GetSpiderList(filter, skip, pageSize)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
@@ -32,7 +39,7 @@ func GetSpiderList(c *gin.Context) {
c.JSON(http.StatusOK, Response{
Status: "ok",
Message: "success",
Data: results,
Data: bson.M{"list": results, "total": count},
})
}

View File

@@ -97,7 +97,7 @@ func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCre
// 发布所有爬虫
func PublishAllSpiders() {
// 获取爬虫列表
spiders, _ := model.GetSpiderList(nil, 0, constants.Infinite)
spiders, _, _ := model.GetSpiderList(nil, 0, constants.Infinite)
if len(spiders) == 0 {
return
}
@@ -143,7 +143,6 @@ func PublishSpider(spider model.Spider) {
// md5值不一样则下载
md5Str := utils.ReadFile(md5)
if gfFile.Md5 != md5Str {
log.Infof("md5 is different, fileName=%s, file-md5=%s , gf-file-md5=%s ", spider.Name, md5Str, gfFile.Md5)
spiderSync.RemoveSpiderFile()
spiderSync.Download()
spiderSync.CreateMd5File(gfFile.Md5)
@@ -155,7 +154,7 @@ func PublishSpider(spider model.Spider) {
func InitSpiderService() error {
// 构造定时任务执行器
c := cron.New(cron.WithSeconds())
if _, err := c.AddFunc("0/15 * * * * *", PublishAllSpiders); err != nil {
if _, err := c.AddFunc("0 * * * * *", PublishAllSpiders); err != nil {
return err
}
// 启动定时任务

View File

@@ -56,7 +56,6 @@ func (s *SpiderSync) RemoveSpiderFile() {
func (s *SpiderSync) CheckDownLoading(spiderId string, fileId string) (bool, string) {
key := s.GetLockDownloadKey(spiderId)
if _, err := database.RedisClient.HGet("spider", key); err == nil {
log.Infof("downloading spider file, spider_id: %s, file_id:%s", spiderId, fileId)
return true, key
}
return false, key
@@ -68,7 +67,6 @@ func (s *SpiderSync) Download() {
fileId := s.Spider.FileId.Hex()
isDownloading, key := s.CheckDownLoading(spiderId, fileId)
if isDownloading {
log.Infof("spider is downloading, spider_id: %s", spiderId)
return
} else {
_ = database.RedisClient.HSet("spider", key, key)
@@ -131,6 +129,5 @@ func (s *SpiderSync) Download() {
return
}
log.Infof("del key : %s", key)
_ = database.RedisClient.HDel("spider", key)
}