feat: added modules

This commit is contained in:
Marvin Zhang
2024-06-14 15:42:50 +08:00
parent f1833fed21
commit 0b67fd9ece
626 changed files with 60104 additions and 0 deletions

35
core/schedule/logger.go Normal file
View File

@@ -0,0 +1,35 @@
package schedule
import (
"fmt"
"github.com/apex/log"
"github.com/crawlab-team/go-trace"
"github.com/robfig/cron/v3"
"strings"
)
type Logger struct {
}
func (l *Logger) Info(msg string, keysAndValues ...interface{}) {
p := l.getPlaceholder(len(keysAndValues))
log.Infof(fmt.Sprintf("cron: %s %s", msg, p), keysAndValues...)
}
func (l *Logger) Error(err error, msg string, keysAndValues ...interface{}) {
p := l.getPlaceholder(len(keysAndValues))
log.Errorf(fmt.Sprintf("cron: %s %s", msg, p), keysAndValues...)
trace.PrintError(err)
}
func (l *Logger) getPlaceholder(n int) (s string) {
var arr []string
for i := 0; i < n; i++ {
arr = append(arr, "%v")
}
return strings.Join(arr, " ")
}
func NewLogger() cron.Logger {
return &Logger{}
}

37
core/schedule/options.go Normal file
View File

@@ -0,0 +1,37 @@
package schedule
import (
"github.com/crawlab-team/crawlab/core/interfaces"
"time"
)
type Option func(svc interfaces.ScheduleService)
func WithConfigPath(path string) Option {
return func(svc interfaces.ScheduleService) {
svc.SetConfigPath(path)
}
}
func WithLocation(loc *time.Location) Option {
return func(svc interfaces.ScheduleService) {
svc.SetLocation(loc)
}
}
func WithDelayIfStillRunning() Option {
return func(svc interfaces.ScheduleService) {
svc.SetDelay(true)
}
}
func WithSkipIfStillRunning() Option {
return func(svc interfaces.ScheduleService) {
svc.SetSkip(true)
}
}
func WithUpdateInterval(interval time.Duration) Option {
return func(svc interfaces.ScheduleService) {
}
}

287
core/schedule/service.go Normal file
View File

@@ -0,0 +1,287 @@
package schedule
import (
"github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/delegate"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/go-trace"
"github.com/robfig/cron/v3"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"sync"
"time"
)
type Service struct {
// dependencies
interfaces.WithConfigPath
modelSvc service.ModelService
adminSvc interfaces.SpiderAdminService
// settings variables
loc *time.Location
delay bool
skip bool
updateInterval time.Duration
// internals
cron *cron.Cron
logger cron.Logger
schedules []models.Schedule
stopped bool
mu sync.Mutex
}
func (svc *Service) GetLocation() (loc *time.Location) {
return svc.loc
}
func (svc *Service) SetLocation(loc *time.Location) {
svc.loc = loc
}
func (svc *Service) GetDelay() (delay bool) {
return svc.delay
}
func (svc *Service) SetDelay(delay bool) {
svc.delay = delay
}
func (svc *Service) GetSkip() (skip bool) {
return svc.skip
}
func (svc *Service) SetSkip(skip bool) {
svc.skip = skip
}
func (svc *Service) GetUpdateInterval() (interval time.Duration) {
return svc.updateInterval
}
func (svc *Service) SetUpdateInterval(interval time.Duration) {
svc.updateInterval = interval
}
func (svc *Service) Init() (err error) {
return svc.fetch()
}
func (svc *Service) Start() {
svc.cron.Start()
go svc.Update()
}
func (svc *Service) Wait() {
utils.DefaultWait()
svc.Stop()
}
func (svc *Service) Stop() {
svc.stopped = true
svc.cron.Stop()
}
func (svc *Service) Enable(s interfaces.Schedule, args ...interface{}) (err error) {
svc.mu.Lock()
defer svc.mu.Unlock()
id, err := svc.cron.AddFunc(s.GetCron(), svc.schedule(s.GetId()))
if err != nil {
return trace.TraceError(err)
}
s.SetEnabled(true)
s.SetEntryId(id)
u := utils.GetUserFromArgs(args...)
return delegate.NewModelDelegate(s, u).Save()
}
func (svc *Service) Disable(s interfaces.Schedule, args ...interface{}) (err error) {
svc.mu.Lock()
defer svc.mu.Unlock()
svc.cron.Remove(s.GetEntryId())
s.SetEnabled(false)
s.SetEntryId(-1)
u := utils.GetUserFromArgs(args...)
return delegate.NewModelDelegate(s, u).Save()
}
func (svc *Service) Update() {
for {
if svc.stopped {
return
}
svc.update()
time.Sleep(svc.updateInterval)
}
}
func (svc *Service) GetCron() (c *cron.Cron) {
return svc.cron
}
func (svc *Service) update() {
// fetch enabled schedules
if err := svc.fetch(); err != nil {
trace.PrintError(err)
return
}
// entry id map
entryIdsMap := svc.getEntryIdsMap()
// iterate enabled schedules
for _, s := range svc.schedules {
_, ok := entryIdsMap[s.EntryId]
if ok {
entryIdsMap[s.EntryId] = true
} else {
if err := svc.Enable(&s); err != nil {
trace.PrintError(err)
continue
}
}
}
// remove non-existent entries
for id, ok := range entryIdsMap {
if !ok {
svc.cron.Remove(id)
}
}
}
func (svc *Service) getEntryIdsMap() (res map[cron.EntryID]bool) {
res = map[cron.EntryID]bool{}
for _, e := range svc.cron.Entries() {
res[e.ID] = false
}
return res
}
func (svc *Service) fetch() (err error) {
query := bson.M{
"enabled": true,
}
svc.schedules, err = svc.modelSvc.GetScheduleList(query, nil)
if err != nil {
return err
}
return nil
}
func (svc *Service) schedule(id primitive.ObjectID) (fn func()) {
return func() {
// schedule
s, err := svc.modelSvc.GetScheduleById(id)
if err != nil {
trace.PrintError(err)
return
}
// spider
spider, err := svc.modelSvc.GetSpiderById(s.GetSpiderId())
if err != nil {
trace.PrintError(err)
return
}
// options
opts := &interfaces.SpiderRunOptions{
Mode: s.GetMode(),
NodeIds: s.GetNodeIds(),
Cmd: s.GetCmd(),
Param: s.GetParam(),
Priority: s.GetPriority(),
ScheduleId: s.GetId(),
UserId: s.UserId,
}
// normalize options
if opts.Mode == "" {
opts.Mode = spider.Mode
}
if len(opts.NodeIds) == 0 {
opts.NodeIds = spider.NodeIds
}
if opts.Cmd == "" {
opts.Cmd = spider.Cmd
}
if opts.Param == "" {
opts.Param = spider.Param
}
if opts.Priority == 0 {
if spider.Priority > 0 {
opts.Priority = spider.Priority
} else {
opts.Priority = 5
}
}
// schedule or assign a task in the task queue
if _, err := svc.adminSvc.Schedule(s.GetSpiderId(), opts); err != nil {
trace.PrintError(err)
}
}
}
func NewScheduleService() (svc2 interfaces.ScheduleService, err error) {
// service
svc := &Service{
WithConfigPath: config.NewConfigPathService(),
loc: time.Local,
// TODO: implement delay and skip
delay: false,
skip: false,
updateInterval: 1 * time.Minute,
}
// dependency injection
if err := container.GetContainer().Invoke(func(
modelSvc service.ModelService,
adminSvc interfaces.SpiderAdminService,
) {
svc.modelSvc = modelSvc
svc.adminSvc = adminSvc
}); err != nil {
return nil, trace.TraceError(err)
}
// logger
svc.logger = NewLogger()
// cron
svc.cron = cron.New(
cron.WithLogger(svc.logger),
cron.WithLocation(svc.loc),
cron.WithChain(cron.Recover(svc.logger)),
)
// initialize
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
var svc interfaces.ScheduleService
func GetScheduleService() (res interfaces.ScheduleService, err error) {
if svc != nil {
return svc, nil
}
svc, err = NewScheduleService()
if err != nil {
return nil, err
}
return svc, nil
}

283
core/schedule/service_v2.go Normal file
View File

@@ -0,0 +1,283 @@
package schedule
import (
"github.com/crawlab-team/crawlab/core/config"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/spider/admin"
"github.com/crawlab-team/crawlab/core/utils"
"github.com/crawlab-team/go-trace"
"github.com/robfig/cron/v3"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"sync"
"time"
)
type ServiceV2 struct {
// dependencies
interfaces.WithConfigPath
modelSvc *service.ModelServiceV2[models.ScheduleV2]
adminSvc *admin.ServiceV2
// settings variables
loc *time.Location
delay bool
skip bool
updateInterval time.Duration
// internals
cron *cron.Cron
logger cron.Logger
schedules []models.ScheduleV2
stopped bool
mu sync.Mutex
}
func (svc *ServiceV2) GetLocation() (loc *time.Location) {
return svc.loc
}
func (svc *ServiceV2) SetLocation(loc *time.Location) {
svc.loc = loc
}
func (svc *ServiceV2) GetDelay() (delay bool) {
return svc.delay
}
func (svc *ServiceV2) SetDelay(delay bool) {
svc.delay = delay
}
func (svc *ServiceV2) GetSkip() (skip bool) {
return svc.skip
}
func (svc *ServiceV2) SetSkip(skip bool) {
svc.skip = skip
}
func (svc *ServiceV2) GetUpdateInterval() (interval time.Duration) {
return svc.updateInterval
}
func (svc *ServiceV2) SetUpdateInterval(interval time.Duration) {
svc.updateInterval = interval
}
func (svc *ServiceV2) Init() (err error) {
return svc.fetch()
}
func (svc *ServiceV2) Start() {
svc.cron.Start()
go svc.Update()
}
func (svc *ServiceV2) Wait() {
utils.DefaultWait()
svc.Stop()
}
func (svc *ServiceV2) Stop() {
svc.stopped = true
svc.cron.Stop()
}
func (svc *ServiceV2) Enable(s models.ScheduleV2, by primitive.ObjectID) (err error) {
svc.mu.Lock()
defer svc.mu.Unlock()
id, err := svc.cron.AddFunc(s.Cron, svc.schedule(s.Id))
if err != nil {
return trace.TraceError(err)
}
s.Enabled = true
s.EntryId = id
s.SetUpdated(by)
return svc.modelSvc.ReplaceById(s.Id, s)
}
func (svc *ServiceV2) Disable(s models.ScheduleV2, by primitive.ObjectID) (err error) {
svc.mu.Lock()
defer svc.mu.Unlock()
svc.cron.Remove(s.EntryId)
s.Enabled = false
s.EntryId = -1
s.SetUpdated(by)
return svc.modelSvc.ReplaceById(s.Id, s)
}
func (svc *ServiceV2) Update() {
for {
if svc.stopped {
return
}
svc.update()
time.Sleep(svc.updateInterval)
}
}
func (svc *ServiceV2) GetCron() (c *cron.Cron) {
return svc.cron
}
func (svc *ServiceV2) update() {
// fetch enabled schedules
if err := svc.fetch(); err != nil {
trace.PrintError(err)
return
}
// entry id map
entryIdsMap := svc.getEntryIdsMap()
// iterate enabled schedules
for _, s := range svc.schedules {
_, ok := entryIdsMap[s.EntryId]
if ok {
entryIdsMap[s.EntryId] = true
} else {
if !s.Enabled {
err := svc.Enable(s, s.GetCreatedBy())
if err != nil {
trace.PrintError(err)
continue
}
}
}
}
// remove non-existent entries
for id, ok := range entryIdsMap {
if !ok {
svc.cron.Remove(id)
}
}
}
func (svc *ServiceV2) getEntryIdsMap() (res map[cron.EntryID]bool) {
res = map[cron.EntryID]bool{}
for _, e := range svc.cron.Entries() {
res[e.ID] = false
}
return res
}
func (svc *ServiceV2) fetch() (err error) {
query := bson.M{
"enabled": true,
}
svc.schedules, err = svc.modelSvc.GetMany(query, nil)
if err != nil {
return err
}
return nil
}
func (svc *ServiceV2) schedule(id primitive.ObjectID) (fn func()) {
return func() {
// schedule
s, err := svc.modelSvc.GetById(id)
if err != nil {
trace.PrintError(err)
return
}
// spider
spider, err := service.NewModelServiceV2[models.SpiderV2]().GetById(s.SpiderId)
if err != nil {
trace.PrintError(err)
return
}
// options
opts := &interfaces.SpiderRunOptions{
Mode: s.Mode,
NodeIds: s.NodeIds,
Cmd: s.Cmd,
Param: s.Param,
Priority: s.Priority,
ScheduleId: s.Id,
UserId: s.GetCreatedBy(),
}
// normalize options
if opts.Mode == "" {
opts.Mode = spider.Mode
}
if len(opts.NodeIds) == 0 {
opts.NodeIds = spider.NodeIds
}
if opts.Cmd == "" {
opts.Cmd = spider.Cmd
}
if opts.Param == "" {
opts.Param = spider.Param
}
if opts.Priority == 0 {
if spider.Priority > 0 {
opts.Priority = spider.Priority
} else {
opts.Priority = 5
}
}
// schedule or assign a task in the task queue
if _, err := svc.adminSvc.Schedule(s.SpiderId, opts); err != nil {
trace.PrintError(err)
}
}
}
func NewScheduleServiceV2() (svc2 *ServiceV2, err error) {
// service
svc := &ServiceV2{
WithConfigPath: config.NewConfigPathService(),
loc: time.Local,
// TODO: implement delay and skip
delay: false,
skip: false,
updateInterval: 1 * time.Minute,
}
svc.adminSvc, err = admin.GetSpiderAdminServiceV2()
if err != nil {
return nil, err
}
svc.modelSvc = service.NewModelServiceV2[models.ScheduleV2]()
// logger
svc.logger = NewLogger()
// cron
svc.cron = cron.New(
cron.WithLogger(svc.logger),
cron.WithLocation(svc.loc),
cron.WithChain(cron.Recover(svc.logger)),
)
// initialize
if err := svc.Init(); err != nil {
return nil, err
}
return svc, nil
}
var svcV2 *ServiceV2
func GetScheduleServiceV2() (res *ServiceV2, err error) {
if svcV2 != nil {
return svcV2, nil
}
svcV2, err = NewScheduleServiceV2()
if err != nil {
return nil, err
}
return svcV2, nil
}

View File

@@ -0,0 +1,91 @@
package test
import (
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/delegate"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/schedule"
"go.uber.org/dig"
"testing"
)
func init() {
var err error
T, err = NewTest()
if err != nil {
panic(err)
}
}
var T *Test
type Test struct {
// dependencies
modelSvc service.ModelService
scheduleSvc interfaces.ScheduleService
// test data
TestSchedule interfaces.Schedule
TestSpider interfaces.Spider
ScriptName string
Script string
}
func (t *Test) Setup(t2 *testing.T) {
t.scheduleSvc.Start()
t2.Cleanup(t.Cleanup)
}
func (t *Test) Cleanup() {
t.scheduleSvc.Stop()
_ = t.modelSvc.GetBaseService(interfaces.ModelIdTask).Delete(nil)
}
func NewTest() (t *Test, err error) {
// test
t = &Test{
TestSpider: &models.Spider{
Name: "test_spider",
Cmd: "go run main.go",
},
ScriptName: "main.go",
Script: `package main
import "fmt"
func main() {
fmt.Println("it works")
}`,
}
// dependency injection
c := dig.New()
if err := c.Provide(service.GetService); err != nil {
return nil, err
}
if err := c.Provide(schedule.NewScheduleService); err != nil {
return nil, err
}
if err := c.Invoke(func(modelSvc service.ModelService, scheduleSvc interfaces.ScheduleService) {
t.modelSvc = modelSvc
t.scheduleSvc = scheduleSvc
}); err != nil {
return nil, err
}
// add spider to db
if err := delegate.NewModelDelegate(t.TestSpider).Add(); err != nil {
return nil, err
}
// test schedule
t.TestSchedule = &models.Schedule{
Name: "test_schedule",
SpiderId: t.TestSpider.GetId(),
Cron: "* * * * *",
}
if err := delegate.NewModelDelegate(t.TestSchedule).Add(); err != nil {
return nil, err
}
return t, nil
}

View File

@@ -0,0 +1,44 @@
package test
import (
"github.com/stretchr/testify/require"
"testing"
"time"
)
func TestScheduleService_Enable_Disable(t *testing.T) {
var err error
T.Setup(t)
time.Sleep(1 * time.Second)
err = T.scheduleSvc.Enable(T.TestSchedule)
require.Nil(t, err)
time.Sleep(1 * time.Second)
require.True(t, T.TestSchedule.GetEnabled())
require.Greater(t, int(T.TestSchedule.GetEntryId()), -1)
e := T.scheduleSvc.GetCron().Entry(T.TestSchedule.GetEntryId())
require.Equal(t, T.TestSchedule.GetEntryId(), e.ID)
time.Sleep(1 * time.Second)
err = T.scheduleSvc.Disable(T.TestSchedule)
require.False(t, T.TestSchedule.GetEnabled())
require.Equal(t, 0, len(T.scheduleSvc.GetCron().Entries()))
}
func TestScheduleService_Run(t *testing.T) {
var err error
T.Setup(t)
time.Sleep(1 * time.Second)
err = T.scheduleSvc.Enable(T.TestSchedule)
require.Nil(t, err)
time.Sleep(1 * time.Minute)
tasks, err := T.modelSvc.GetTaskList(nil, nil)
require.Nil(t, err)
require.Greater(t, len(tasks), 0)
for _, task := range tasks {
require.False(t, task.ScheduleId.IsZero())
}
}