Merge pull request #243 from crawlab-team/develop

Develop
This commit is contained in:
暗音
2019-10-07 13:09:29 +08:00
committed by GitHub
5 changed files with 30 additions and 7 deletions

View File

@@ -21,6 +21,7 @@
三种方式:
1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html)(推荐)
2. [直接部署](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html)(了解内核)
3. [Kubernetes](https://mp.weixin.qq.com/s/3Q1BQATUIEE_WXcHPqhYbA)
### 要求Docker
- Docker 18.03+

View File

@@ -21,6 +21,7 @@ Golang-based distributed web crawler management platform, supporting various lan
Two methods:
1. [Docker](https://tikazyq.github.io/crawlab-docs/Installation/Docker.html) (Recommended)
2. [Direct Deploy](https://tikazyq.github.io/crawlab-docs/Installation/Direct.html) (Check Internal Kernel)
3. [Kubernetes](https://mp.weixin.qq.com/s/3Q1BQATUIEE_WXcHPqhYbA)
### Pre-requisite (Docker)
- Docker 18.03+

View File

@@ -3,6 +3,7 @@ package database
import (
"github.com/globalsign/mgo"
"github.com/spf13/viper"
"net"
"time"
)
@@ -39,13 +40,28 @@ func InitMongo() error {
var mongoAuth = viper.GetString("mongo.authSource")
if Session == nil {
var uri string
if mongoUsername == "" {
uri = "mongodb://" + mongoHost + ":" + mongoPort + "/" + mongoDb
} else {
uri = "mongodb://" + mongoUsername + ":" + mongoPassword + "@" + mongoHost + ":" + mongoPort + "/" + mongoDb + "?authSource=" + mongoAuth
var dialInfo mgo.DialInfo
addr := net.JoinHostPort(mongoHost, mongoPort)
timeout := time.Second * 10
dialInfo = mgo.DialInfo{
Addrs: []string{addr},
Timeout: timeout,
Database: mongoDb,
PoolLimit: 100,
PoolTimeout: timeout,
ReadTimeout: timeout,
WriteTimeout: timeout,
AppName: "crawlab",
FailFast: true,
MinPoolSize: 10,
MaxIdleTimeMS: 1000 * 30,
}
sess, err := mgo.DialWithTimeout(uri, time.Second*5)
if mongoUsername != "" {
dialInfo.Username = mongoUsername
dialInfo.Password = mongoPassword
dialInfo.Source = mongoAuth
}
sess, err := mgo.DialWithInfo(&dialInfo)
if err != nil {
return err
}

View File

@@ -135,7 +135,7 @@ func PutSpider(c *gin.Context) {
// 以防tmp目录不存在
tmpPath := viper.GetString("other.tmppath")
if !utils.Exists(tmpPath) {
if err := os.Mkdir(tmpPath, os.ModePerm); err != nil {
if err := os.MkdirAll(tmpPath, os.ModePerm); err != nil {
log.Error("mkdir other.tmppath dir error:" + err.Error())
debug.PrintStack()
HandleError(http.StatusBadRequest, c, errors.New("Mkdir other.tmppath dir error"))

View File

@@ -16,6 +16,7 @@ import (
"os"
"path/filepath"
"runtime/debug"
"strings"
)
type SpiderFileData struct {
@@ -144,7 +145,11 @@ func PublishSpider(spider model.Spider) {
}
// md5值不一样则下载
md5Str := utils.ReadFileOneLine(md5)
// 去掉空格以及换行符
md5Str = strings.Replace(md5Str, " ", "", -1)
md5Str = strings.Replace(md5Str, "\n", "", -1)
if gfFile.Md5 != md5Str {
log.Infof("md5 is different, gf-md5:%s, file-md5:%s", gfFile.Md5, md5Str)
spiderSync.RemoveSpiderFile()
spiderSync.Download()
spiderSync.CreateMd5File(gfFile.Md5)