refactor: code cleanup

This commit is contained in:
Marvin Zhang
2024-10-20 17:17:08 +08:00
parent 4ac92d7eaf
commit 418af7c215
28 changed files with 80 additions and 5849 deletions

View File

@@ -41,6 +41,9 @@ func (c *Config) WatchConfig() {
}
func (c *Config) Init() (err error) {
// Set default values
c.setDefaults()
// config
if c.Name != "" {
viper.SetConfigFile(c.Name) // if config file is set, load it accordingly
@@ -50,13 +53,13 @@ func (c *Config) Init() (err error) {
}
// config type as yaml
viper.SetConfigType("yaml") // default yaml
viper.SetConfigType("yaml")
// auto env
viper.AutomaticEnv() // load matched environment variables
viper.AutomaticEnv()
// env prefix
viper.SetEnvPrefix("CRAWLAB") // environment variable prefix as CRAWLAB
viper.SetEnvPrefix("CRAWLAB")
// replacer
replacer := strings.NewReplacer(".", "_")
@@ -64,13 +67,39 @@ func (c *Config) Init() (err error) {
// read in config
if err := viper.ReadInConfig(); err != nil {
log.Errorf("Error reading config file, %s", err)
return err
if _, ok := err.(viper.ConfigFileNotFoundError); ok {
log.Warn("No config file found. Using default values.")
} else {
log.Errorf("Error reading config file: %s", err)
return err
}
}
return nil
}
func (c *Config) setDefaults() {
viper.SetDefault("edition", "global.edition.community")
viper.SetDefault("mongo.host", "localhost")
viper.SetDefault("mongo.port", 27017)
viper.SetDefault("mongo.db", "crawlab_test")
viper.SetDefault("mongo.username", "")
viper.SetDefault("mongo.password", "")
viper.SetDefault("mongo.authSource", "admin")
viper.SetDefault("server.host", "0.0.0.0")
viper.SetDefault("server.port", 8000)
viper.SetDefault("grpc.address", "localhost:9666")
viper.SetDefault("grpc.server.address", "0.0.0.0:9666")
viper.SetDefault("grpc.authKey", "Crawlab2021!")
viper.SetDefault("api.endpoint", "http://localhost:8000")
viper.SetDefault("log.path", "/var/log/crawlab")
}
func (c *Config) initLogLevel() {
// set log level
logLevel := viper.GetString("log.level")

View File

@@ -19,47 +19,58 @@ func init() {
}
func TestInitConfig(t *testing.T) {
// Create a temporary directory for the test
// Create a new Config instance
c := Config{Name: ""}
// Initialize the config
err := c.Init()
require.NoError(t, err, "Failed to initialize config")
// Test default values
assert.Equal(t, "global.edition.community", viper.GetString("edition"), "Unexpected default value for edition")
assert.Equal(t, "localhost", viper.GetString("mongo.host"), "Unexpected default value for mongo.host")
assert.Equal(t, 27017, viper.GetInt("mongo.port"), "Unexpected default value for mongo.port")
assert.Equal(t, "crawlab_test", viper.GetString("mongo.db"), "Unexpected default value for mongo.db")
assert.Equal(t, "0.0.0.0", viper.GetString("server.host"), "Unexpected default value for server.host")
assert.Equal(t, 8000, viper.GetInt("server.port"), "Unexpected default value for server.port")
assert.Equal(t, "localhost:9666", viper.GetString("grpc.address"), "Unexpected default value for grpc.address")
assert.Equal(t, "Crawlab2021!", viper.GetString("grpc.authKey"), "Unexpected default value for grpc.authKey")
assert.Equal(t, "http://localhost:8000", viper.GetString("api.endpoint"), "Unexpected default value for api.endpoint")
assert.Equal(t, "/var/log/crawlab", viper.GetString("log.path"), "Unexpected default value for log.path")
// Test environment variable override
os.Setenv("CRAWLAB_MONGO_HOST", "mongodb.example.com")
defer os.Unsetenv("CRAWLAB_MONGO_HOST")
assert.Equal(t, "mongodb.example.com", viper.GetString("mongo.host"), "Environment variable should override default value")
// Test with a config file
tempDir, err := os.MkdirTemp("", "crawlab-config-test")
require.NoError(t, err, "Failed to create temp directory")
defer os.RemoveAll(tempDir)
// Create a temporary config file
configContent := []byte(`
log:
level: info
test:
string: default_string_value
int: 0
bool: false
nested:
key: default_nested_value
edition: global.edition.pro
mongo:
host: mongodb.custom.com
port: 27018
server:
port: 8001
`)
configPath := filepath.Join(tempDir, "config.yaml")
err = os.WriteFile(configPath, configContent, 0644)
require.NoError(t, err, "Failed to write config file")
// Set up the test environment
oldConfigPath := viper.ConfigFileUsed()
defer viper.SetConfigFile(oldConfigPath)
viper.SetConfigFile(configPath)
// Create a new Config instance with the config file
cWithFile := Config{Name: configPath}
err = cWithFile.Init()
require.NoError(t, err, "Failed to initialize config with file")
// Create a new Config instance
c := Config{Name: configPath}
// Test values from config file
assert.Equal(t, "global.edition.pro", viper.GetString("edition"), "Unexpected value for edition from config file")
assert.Equal(t, "mongodb.custom.com", viper.GetString("mongo.host"), "Unexpected value for mongo.host from config file")
assert.Equal(t, 27018, viper.GetInt("mongo.port"), "Unexpected value for mongo.port from config file")
assert.Equal(t, 8001, viper.GetInt("server.port"), "Unexpected value for server.port from config file")
// Initialize the config
err = c.Init()
require.NoError(t, err, "Failed to initialize config")
// Test config values
assert.Equal(t, "default_string_value", viper.GetString("test.string"), "Unexpected value for test.string")
assert.Equal(t, 0, viper.GetInt("test.int"), "Unexpected value for test.int")
assert.False(t, viper.GetBool("test.bool"), "Unexpected value for test.bool")
assert.Equal(t, "default_nested_value", viper.GetString("test.nested.key"), "Unexpected value for test.nested.key")
assert.Empty(t, viper.GetString("non.existent.key"), "Non-existent key should return empty string")
// Test environment variable override
os.Setenv("CRAWLAB_TEST_STRING", "env_string_value")
defer os.Unsetenv("CRAWLAB_TEST_STRING")
assert.Equal(t, "env_string_value", viper.GetString("test.string"), "Environment variable should override config value")
// Values not in config file should still use defaults
assert.Equal(t, "Crawlab2021!", viper.GetString("grpc.authKey"), "Unexpected default value for grpc.authKey")
}

View File

@@ -3,6 +3,10 @@ package controllers_test
import (
"bytes"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/crawlab-team/crawlab/core/controllers"
"github.com/crawlab-team/crawlab/core/middlewares"
"github.com/crawlab-team/crawlab/core/models/models/v2"
@@ -12,9 +16,6 @@ import (
"github.com/stretchr/testify/require"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"net/http"
"net/http/httptest"
"testing"
)
func TestCreateSpider(t *testing.T) {
@@ -45,7 +46,6 @@ func TestCreateSpider(t *testing.T) {
require.Nil(t, err)
assert.False(t, response.Data.Id.IsZero())
assert.Equal(t, payload.Name, response.Data.Name)
assert.False(t, response.Data.ColId.IsZero())
}
func TestGetSpiderById(t *testing.T) {

File diff suppressed because it is too large Load Diff

View File

@@ -1,18 +0,0 @@
package entity
type Color struct {
Name string `json:"name"`
Hex string `json:"hex"`
}
func (c *Color) GetHex() string {
return c.Hex
}
func (c *Color) GetName() string {
return c.Name
}
func (c *Color) Value() interface{} {
return c
}

View File

@@ -1,17 +0,0 @@
package entity
import "strconv"
type Page struct {
Skip int
Limit int
PageNum int
PageSize int
}
func (p *Page) GetPage(pageNum string, pageSize string) {
p.PageNum, _ = strconv.Atoi(pageNum)
p.PageSize, _ = strconv.Atoi(pageSize)
p.Skip = p.PageSize * (p.PageNum - 1)
p.Limit = p.PageSize
}

View File

@@ -1,40 +0,0 @@
package entity
type ConfigSpiderData struct {
// 通用
Name string `yaml:"name" json:"name"`
DisplayName string `yaml:"display_name" json:"display_name"`
Col string `yaml:"col" json:"col"`
Remark string `yaml:"remark" json:"remark"`
Type string `yaml:"type" bson:"type"`
// 可配置爬虫
Engine string `yaml:"engine" json:"engine"`
StartUrl string `yaml:"start_url" json:"start_url"`
StartStage string `yaml:"start_stage" json:"start_stage"`
Stages []Stage `yaml:"stages" json:"stages"`
Settings map[string]string `yaml:"settings" json:"settings"`
// 自定义爬虫
Cmd string `yaml:"cmd" json:"cmd"`
}
type Stage struct {
Name string `yaml:"name" json:"name"`
IsList bool `yaml:"is_list" json:"is_list"`
ListCss string `yaml:"list_css" json:"list_css"`
ListXpath string `yaml:"list_xpath" json:"list_xpath"`
PageCss string `yaml:"page_css" json:"page_css"`
PageXpath string `yaml:"page_xpath" json:"page_xpath"`
PageAttr string `yaml:"page_attr" json:"page_attr"`
Fields []Field `yaml:"fields" json:"fields"`
}
type Field struct {
Name string `yaml:"name" json:"name"`
Css string `yaml:"css" json:"css"`
Xpath string `yaml:"xpath" json:"xpath"`
Attr string `yaml:"attr" json:"attr"`
NextStage string `yaml:"next_stage" json:"next_stage"`
Remark string `yaml:"remark" json:"remark"`
}

View File

@@ -1,8 +0,0 @@
package entity
type DocItem struct {
Title string `json:"title"`
Url string `json:"url"`
Path string `json:"path"`
Children []DocItem `json:"children"`
}

View File

@@ -1,54 +0,0 @@
package entity
/* ElasticsearchResponseData JSON format
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 60,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "test_table",
"_id" : "c39ad9a2-9a37-49fb-b7ea-f1b55913e0af",
"_score" : 1.0,
"_source" : {
"_tid" : "62524ac7f5f99e7ef594de64",
"author" : "James Baldwin",
"tags" : [
"love"
],
"text" : "“Love does not begin and end the way we seem to think it does. Love is a battle, love is a war; love is a growing up.”"
}
}
]
}
}
*/
type ElasticsearchResponseData struct {
Took int64 `json:"took"`
Timeout bool `json:"timeout"`
Hits struct {
Total struct {
Value int64 `json:"value"`
Relation string `json:"relation"`
} `json:"total"`
MaxScore float64 `json:"max_score"`
Hits []struct {
Index string `json:"_index"`
Id string `json:"_id"`
Score float64 `json:"_score"`
Source interface{} `json:"_source"`
} `json:"hits"`
} `json:"hits"`
}

View File

@@ -1,14 +0,0 @@
package entity
type EventData struct {
Event string
Data interface{}
}
func (d *EventData) GetEvent() string {
return d.Event
}
func (d *EventData) GetData() interface{} {
return d.Data
}

View File

@@ -1,12 +0,0 @@
package entity
type GitPayload struct {
Paths []string `json:"paths"`
CommitMessage string `json:"commit_message"`
Branch string `json:"branch"`
Tag string `json:"tag"`
}
type GitConfig struct {
Url string `json:"url" bson:"url"`
}

View File

@@ -1,29 +0,0 @@
package entity
import (
"encoding/json"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/trace"
)
type GrpcBaseServiceMessage struct {
ModelId interfaces.ModelId `json:"id"`
Data []byte `json:"d"`
}
func (msg *GrpcBaseServiceMessage) GetModelId() interfaces.ModelId {
return msg.ModelId
}
func (msg *GrpcBaseServiceMessage) GetData() []byte {
return msg.Data
}
func (msg *GrpcBaseServiceMessage) ToBytes() (data []byte) {
data, err := json.Marshal(*msg)
if err != nil {
_ = trace.TraceError(err)
return data
}
return data
}

View File

@@ -1,23 +0,0 @@
package entity
import (
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/db/mongo"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
)
type GrpcBaseServiceParams struct {
Query bson.M `json:"q"`
Id primitive.ObjectID `json:"id"`
Update bson.M `json:"u"`
Doc interfaces.Model `json:"d"`
Fields []string `json:"f"`
FindOptions *mongo.FindOptions `json:"o"`
Docs []interface{} `json:"dl"`
User interfaces.User `json:"U"`
}
func (params *GrpcBaseServiceParams) Value() interface{} {
return params
}

View File

@@ -1,34 +0,0 @@
package entity
import (
"encoding/json"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/trace"
)
type GrpcDelegateMessage struct {
ModelId interfaces.ModelId `json:"id"`
Method interfaces.ModelDelegateMethod `json:"m"`
Data []byte `json:"d"`
}
func (msg *GrpcDelegateMessage) GetModelId() interfaces.ModelId {
return msg.ModelId
}
func (msg *GrpcDelegateMessage) GetMethod() interfaces.ModelDelegateMethod {
return msg.Method
}
func (msg *GrpcDelegateMessage) GetData() []byte {
return msg.Data
}
func (msg *GrpcDelegateMessage) ToBytes() (data []byte) {
data, err := json.Marshal(*msg)
if err != nil {
_ = trace.TraceError(err)
return data
}
return data
}

View File

@@ -1,8 +0,0 @@
package entity
type GrpcEventServiceMessage struct {
Type string `json:"type"`
Events []string `json:"events"`
Key string `json:"key"`
Data []byte `json:"data"`
}

View File

@@ -17,26 +17,6 @@ type ListResponse struct {
Error string `json:"error"`
}
type ListRequestData struct {
PageNum int `form:"page_num" json:"page_num"`
PageSize int `form:"page_size" json:"page_size"`
SortKey string `form:"sort_key" json:"sort_key"`
Status string `form:"status" json:"status"`
Keyword string `form:"keyword" json:"keyword"`
}
type BatchRequestPayload struct {
Ids []primitive.ObjectID `form:"ids" json:"ids"`
}
type BatchRequestPayloadWithStringData struct {
Ids []primitive.ObjectID `form:"ids" json:"ids"`
Data string `form:"data" json:"data"`
Fields []string `form:"fields" json:"fields"`
}
type FileRequestPayload struct {
Path string `json:"path" form:"path"`
NewPath string `json:"new_path" form:"new_path"`
Data string `json:"data" form:"data"`
}

View File

@@ -1,11 +0,0 @@
package entity
import "github.com/crawlab-team/crawlab/core/interfaces"
type ModelDelegate struct {
Id interfaces.ModelId `json:"id"`
ColName string `json:"col_name"`
Doc interfaces.Model `json:"doc"`
Artifact interfaces.ModelArtifact `json:"a"`
User interfaces.User `json:"u"`
}

View File

@@ -1,8 +0,0 @@
package entity
import "github.com/crawlab-team/crawlab/core/interfaces"
type ModelInfo struct {
Id interfaces.ModelId
ColName string
}

View File

@@ -1,11 +0,0 @@
package entity
type RpcMessage struct {
Id string `json:"id"` // 消息ID
Method string `json:"method"` // 消息方法
NodeId string `json:"node_id"` // 节点ID
Params map[string]string `json:"params"` // 参数
Timeout int `json:"timeout"` // 超时
Result string `json:"result"` // 结果
Error string `json:"error"` // 错误
}

View File

@@ -1,17 +0,0 @@
package entity
type SpiderType struct {
Type string `json:"type" bson:"_id"`
Count int `json:"count" bson:"count"`
}
type ScrapySettingParam struct {
Key string `json:"key"`
Value interface{} `json:"value"`
Type string `json:"type"`
}
type ScrapyItem struct {
Name string `json:"name"`
Fields []string `json:"fields"`
}

View File

@@ -5,8 +5,3 @@ type StatsDailyItem struct {
Tasks int64 `json:"tasks" bson:"tasks"`
Results int64 `json:"results" bson:"results"`
}
type StatsTasksByStatusItem struct {
Status string `json:"status" bson:"_id"`
Tasks int64 `json:"tasks" bson:"tasks"`
}

View File

@@ -1,28 +1,9 @@
package entity
import (
"encoding/json"
"go.mongodb.org/mongo-driver/bson/primitive"
)
type TaskMessage struct {
Id primitive.ObjectID `json:"id"`
Key string `json:"key"`
Cmd string `json:"cmd"`
Param string `json:"param"`
}
func (m *TaskMessage) ToString() (string, error) {
data, err := json.Marshal(&m)
if err != nil {
return "", err
}
return string(data), err
}
type TaskRunOptions struct {
}
type StreamMessageTaskData struct {
TaskId primitive.ObjectID `json:"task_id"`
Records []Result `json:"data"`

View File

@@ -1,11 +0,0 @@
package entity
type Translation struct {
Lang string `json:"lang"`
Key string `json:"key"`
Value string `json:"value"`
}
func (t Translation) GetLang() (l string) {
return t.Lang
}

View File

@@ -1,58 +0,0 @@
package entity
import (
"sync"
"time"
)
type TTLMap struct {
TTL time.Duration
data sync.Map
}
type expireEntry struct {
ExpiresAt time.Time
Value interface{}
}
func (t *TTLMap) Store(key string, val interface{}) {
t.data.Store(key, expireEntry{
ExpiresAt: time.Now().Add(t.TTL),
Value: val,
})
}
func (t *TTLMap) Load(key string) (val interface{}) {
entry, ok := t.data.Load(key)
if !ok {
return nil
}
expireEntry := entry.(expireEntry)
if expireEntry.ExpiresAt.After(time.Now()) {
return nil
}
return expireEntry.Value
}
func NewTTLMap(ttl time.Duration) (m *TTLMap) {
m = &TTLMap{
TTL: ttl,
}
go func() {
for now := range time.Tick(time.Second) {
m.data.Range(func(k, v interface{}) bool {
expiresAt := v.(expireEntry).ExpiresAt
if expiresAt.Before(now) {
m.data.Delete(k)
}
return true
})
}
}()
return
}

View File

@@ -1,23 +0,0 @@
package entity
type Release struct {
Name string `json:"name"`
Draft bool `json:"draft"`
PreRelease bool `json:"pre_release"`
PublishedAt string `json:"published_at"`
Body string `json:"body"`
}
type ReleaseSlices []Release
func (r ReleaseSlices) Len() int {
return len(r)
}
func (r ReleaseSlices) Less(i, j int) bool {
return r[i].PublishedAt < r[j].PublishedAt
}
func (r ReleaseSlices) Swap(i, j int) {
r[i], r[j] = r[j], r[i]
}

View File

@@ -1,6 +0,0 @@
package event
func SendEvent(eventName string, data ...interface{}) {
svc := NewEventService()
svc.SendEvent(eventName, data...)
}

View File

@@ -1,92 +0,0 @@
package event
import (
"fmt"
"github.com/apex/log"
"github.com/crawlab-team/crawlab/core/entity"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/crawlab/trace"
"github.com/thoas/go-funk"
"regexp"
)
var S interfaces.EventService
type Service struct {
keys []string
includes []string
excludes []string
chs []*chan interfaces.EventData
}
func (svc *Service) Register(key, include, exclude string, ch *chan interfaces.EventData) {
svc.keys = append(svc.keys, key)
svc.includes = append(svc.includes, include)
svc.excludes = append(svc.excludes, exclude)
svc.chs = append(svc.chs, ch)
}
func (svc *Service) Unregister(key string) {
idx := funk.IndexOfString(svc.keys, key)
if idx != -1 {
svc.keys = append(svc.keys[:idx], svc.keys[(idx+1):]...)
svc.includes = append(svc.includes[:idx], svc.includes[(idx+1):]...)
svc.excludes = append(svc.excludes[:idx], svc.excludes[(idx+1):]...)
svc.chs = append(svc.chs[:idx], svc.chs[(idx+1):]...)
log.Infof("[EventService] unregistered %s", key)
}
}
func (svc *Service) SendEvent(eventName string, data ...interface{}) {
for i, key := range svc.keys {
// include
include := svc.includes[i]
matchedInclude, err := regexp.MatchString(include, eventName)
if err != nil {
trace.PrintError(err)
continue
}
if !matchedInclude {
continue
}
// exclude
exclude := svc.excludes[i]
matchedExclude, err := regexp.MatchString(exclude, eventName)
if err != nil {
trace.PrintError(err)
continue
}
if matchedExclude {
continue
}
// send event
utils.LogDebug(fmt.Sprintf("key %s matches event %s", key, eventName))
ch := svc.chs[i]
go func(ch *chan interfaces.EventData) {
for _, d := range data {
*ch <- &entity.EventData{
Event: eventName,
Data: d,
}
}
}(ch)
}
}
func NewEventService() (svc interfaces.EventService) {
if S != nil {
return S
}
svc = &Service{
chs: []*chan interfaces.EventData{},
keys: []string{},
}
S = svc
return svc
}

View File

@@ -1,59 +0,0 @@
package server
import (
"encoding/json"
"github.com/crawlab-team/crawlab/core/entity"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/trace"
)
func NewModelBaseServiceBinder(req *grpc.Request) (b *ModelBaseServiceBinder) {
return &ModelBaseServiceBinder{
req: req,
msg: &entity.GrpcBaseServiceMessage{},
}
}
type ModelBaseServiceBinder struct {
req *grpc.Request
msg interfaces.GrpcModelBaseServiceMessage
}
func (b *ModelBaseServiceBinder) Bind() (res *entity.GrpcBaseServiceParams, err error) {
if err := b.bindBaseServiceMessage(); err != nil {
return nil, err
}
params := &entity.GrpcBaseServiceParams{}
return b.process(params)
}
func (b *ModelBaseServiceBinder) MustBind() (res interface{}) {
res, err := b.Bind()
if err != nil {
panic(err)
}
return res
}
func (b *ModelBaseServiceBinder) BindWithBaseServiceMessage() (params *entity.GrpcBaseServiceParams, msg interfaces.GrpcModelBaseServiceMessage, err error) {
if err := json.Unmarshal(b.req.Data, b.msg); err != nil {
return nil, nil, err
}
params, err = b.Bind()
if err != nil {
return nil, nil, err
}
return params, b.msg, nil
}
func (b *ModelBaseServiceBinder) process(params *entity.GrpcBaseServiceParams) (res *entity.GrpcBaseServiceParams, err error) {
if err := json.Unmarshal(b.msg.GetData(), params); err != nil {
return nil, trace.TraceError(err)
}
return params, nil
}
func (b *ModelBaseServiceBinder) bindBaseServiceMessage() (err error) {
return json.Unmarshal(b.req.Data, b.msg)
}