Files
crawlab/core/task/stats/service.go
2024-06-14 16:37:48 +08:00

156 lines
3.4 KiB
Go

package stats
import (
"github.com/crawlab-team/crawlab/core/container"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/core/models/service"
"github.com/crawlab-team/crawlab/core/result"
"github.com/crawlab-team/crawlab/core/task"
"github.com/crawlab-team/crawlab/core/task/log"
"github.com/crawlab-team/crawlab/db/mongo"
"github.com/crawlab-team/crawlab/trace"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"sync"
"time"
)
type Service struct {
// dependencies
interfaces.TaskBaseService
nodeCfgSvc interfaces.NodeConfigService
modelSvc service.ModelService
// internals
mu sync.Mutex
resultServices sync.Map
rsTtl time.Duration
logDriver log.Driver
}
func (svc *Service) Init() (err error) {
go svc.cleanup()
return nil
}
func (svc *Service) InsertData(id primitive.ObjectID, records ...interface{}) (err error) {
resultSvc, err := svc.getResultService(id)
if err != nil {
return err
}
if err := resultSvc.Insert(records...); err != nil {
return err
}
go svc.updateTaskStats(id, len(records))
return nil
}
func (svc *Service) InsertLogs(id primitive.ObjectID, logs ...string) (err error) {
return svc.logDriver.WriteLines(id.Hex(), logs)
}
func (svc *Service) getResultService(id primitive.ObjectID) (resultSvc interfaces.ResultService, err error) {
// atomic operation
svc.mu.Lock()
defer svc.mu.Unlock()
// attempt to get from cache
res, _ := svc.resultServices.Load(id.Hex())
if res != nil {
// hit in cache
resultSvc, ok := res.(interfaces.ResultService)
resultSvc.SetTime(time.Now())
if ok {
return resultSvc, nil
}
}
// task
t, err := svc.modelSvc.GetTaskById(id)
if err != nil {
return nil, err
}
// result service
resultSvc, err = result.GetResultService(t.SpiderId)
if err != nil {
return nil, err
}
// store in cache
svc.resultServices.Store(id.Hex(), resultSvc)
return resultSvc, nil
}
func (svc *Service) updateTaskStats(id primitive.ObjectID, resultCount int) {
_ = mongo.GetMongoCol(interfaces.ModelColNameTaskStat).UpdateId(id, bson.M{
"$inc": bson.M{
"result_count": resultCount,
},
})
}
func (svc *Service) cleanup() {
for {
// atomic operation
svc.mu.Lock()
svc.resultServices.Range(func(key, value interface{}) bool {
rs := value.(interfaces.ResultService)
if time.Now().After(rs.GetTime().Add(svc.rsTtl)) {
svc.resultServices.Delete(key)
}
return true
})
svc.mu.Unlock()
time.Sleep(10 * time.Minute)
}
}
func NewTaskStatsService() (svc2 interfaces.TaskStatsService, err error) {
// base service
baseSvc, err := task.NewBaseService()
if err != nil {
return nil, trace.TraceError(err)
}
// service
svc := &Service{
mu: sync.Mutex{},
TaskBaseService: baseSvc,
resultServices: sync.Map{},
}
// dependency injection
if err := container.GetContainer().Invoke(func(nodeCfgSvc interfaces.NodeConfigService, modelSvc service.ModelService) {
svc.nodeCfgSvc = nodeCfgSvc
svc.modelSvc = modelSvc
}); err != nil {
return nil, trace.TraceError(err)
}
// log driver
svc.logDriver, err = log.GetLogDriver(log.DriverTypeFile)
if err != nil {
return nil, err
}
return svc, nil
}
var _service interfaces.TaskStatsService
func GetTaskStatsService() (svr interfaces.TaskStatsService, err error) {
if _service != nil {
return _service, nil
}
_service, err = NewTaskStatsService()
if err != nil {
return nil, err
}
return _service, nil
}