From a599db1810ee0c828fb29a177697537be4cf087c Mon Sep 17 00:00:00 2001 From: hantmac Date: Tue, 20 Aug 2019 16:59:53 +0800 Subject: [PATCH] Code optimization:change the crawler zip file to slice read --- backend/services/spider.go | 61 ++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/backend/services/spider.go b/backend/services/spider.go index bf144959..9404c63a 100644 --- a/backend/services/spider.go +++ b/backend/services/spider.go @@ -7,10 +7,12 @@ import ( "crawlab/model" "crawlab/utils" "encoding/json" + "fmt" "github.com/apex/log" + "github.com/globalsign/mgo" "github.com/globalsign/mgo/bson" "github.com/pkg/errors" - uuid "github.com/satori/go.uuid" + "github.com/satori/go.uuid" "github.com/spf13/viper" "io" "io/ioutil" @@ -162,24 +164,6 @@ func ZipSpider(spider model.Spider) (filePath string, err error) { func UploadToGridFs(spider model.Spider, fileName string, filePath string) (fid bson.ObjectId, err error) { fid = "" - // 读取zip文件 - file, err := os.OpenFile(filePath, os.O_RDONLY, 0777) - fileBytes, err := ioutil.ReadAll(file) - if err != nil { - debug.PrintStack() - return - } - if err = file.Close(); err != nil { - debug.PrintStack() - return - } - - // 删除zip文件 - if err = os.Remove(filePath); err != nil { - debug.PrintStack() - return - } - // 获取MongoDB GridFS连接 s, gf := database.GetGridFs("files") defer s.Close() @@ -198,23 +182,54 @@ func UploadToGridFs(spider model.Spider, fileName string, filePath string) (fid return } - // 将文件写入到GridFS - if _, err = f.Write(fileBytes); err != nil { + //分片读取爬虫zip文件 + ReadFileByStep(filePath, WriteToGridFS, f) + + // 删除zip文件 + if err = os.Remove(filePath); err != nil { debug.PrintStack() return } - // 关闭文件,提交写入 if err = f.Close(); err != nil { return "", err } - // 文件ID fid = f.Id().(bson.ObjectId) return fid, nil } +func WriteToGridFS(content []byte, f *mgo.GridFile) { + if _, err := f.Write(content); err != nil { + debug.PrintStack() + return + } +} + +//分片读取大文件 +func ReadFileByStep(filePath string, handle func([]byte, *mgo.GridFile), fileCreate *mgo.GridFile) error { + f, err := os.OpenFile(filePath, os.O_RDONLY, 0777) + if err != nil { + log.Infof("can't opened this file") + return err + } + defer f.Close() + s := make([]byte, 4096) + for { + switch nr, err := f.Read(s[:]); true { + case nr < 0: + fmt.Fprintf(os.Stderr, "cat: error reading: %s\n", err.Error()) + debug.PrintStack() + case nr == 0: // EOF + return nil + case nr > 0: + handle(s[0:nr], fileCreate) + } + } + return nil +} + // 发布所有爬虫 func PublishAllSpiders() error { // 获取爬虫列表