mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
refactor: integrated database services into task data insert
This commit is contained in:
@@ -3,7 +3,6 @@ package controllers
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"github.com/apex/log"
|
"github.com/apex/log"
|
||||||
"github.com/crawlab-team/crawlab/core/constants"
|
|
||||||
"github.com/crawlab-team/crawlab/core/fs"
|
"github.com/crawlab-team/crawlab/core/fs"
|
||||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||||
models2 "github.com/crawlab-team/crawlab/core/models/models/v2"
|
models2 "github.com/crawlab-team/crawlab/core/models/models/v2"
|
||||||
@@ -48,8 +47,8 @@ func GetSpiderById(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// data collection
|
// data collection (compatible to old version) # TODO: remove in the future
|
||||||
if !s.ColId.IsZero() {
|
if s.ColName == "" && !s.ColId.IsZero() {
|
||||||
col, err := service.NewModelServiceV2[models2.DataCollectionV2]().GetById(s.ColId)
|
col, err := service.NewModelServiceV2[models2.DataCollectionV2]().GetById(s.ColId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !errors.Is(err, mongo2.ErrNoDocuments) {
|
if !errors.Is(err, mongo2.ErrNoDocuments) {
|
||||||
@@ -252,12 +251,6 @@ func PostSpider(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// upsert data collection
|
|
||||||
if err := upsertSpiderDataCollection(&s); err != nil {
|
|
||||||
HandleErrorInternalServerError(c, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// user
|
// user
|
||||||
u := GetUserFromContextV2(c)
|
u := GetUserFromContextV2(c)
|
||||||
|
|
||||||
@@ -311,12 +304,6 @@ func PutSpiderById(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// upsert data collection
|
|
||||||
if err := upsertSpiderDataCollection(&s); err != nil {
|
|
||||||
HandleErrorInternalServerError(c, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
u := GetUserFromContextV2(c)
|
u := GetUserFromContextV2(c)
|
||||||
|
|
||||||
modelSvc := service.NewModelServiceV2[models2.SpiderV2]()
|
modelSvc := service.NewModelServiceV2[models2.SpiderV2]()
|
||||||
@@ -773,49 +760,6 @@ func getSpiderFsSvcById(id primitive.ObjectID) (svc interfaces.FsServiceV2, err
|
|||||||
return getSpiderFsSvc(s)
|
return getSpiderFsSvc(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func upsertSpiderDataCollection(s *models2.SpiderV2) (err error) {
|
|
||||||
modelSvc := service.NewModelServiceV2[models2.DataCollectionV2]()
|
|
||||||
if s.ColId.IsZero() {
|
|
||||||
// validate
|
|
||||||
if s.ColName == "" {
|
|
||||||
return errors.New("data collection name is required")
|
|
||||||
}
|
|
||||||
// no id
|
|
||||||
dc, err := modelSvc.GetOne(bson.M{"name": s.ColName}, nil)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, mongo2.ErrNoDocuments) {
|
|
||||||
// not exists, add new
|
|
||||||
dc = &models2.DataCollectionV2{Name: s.ColName}
|
|
||||||
dcId, err := modelSvc.InsertOne(*dc)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
dc.SetId(dcId)
|
|
||||||
} else {
|
|
||||||
// error
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s.ColId = dc.Id
|
|
||||||
|
|
||||||
// create index
|
|
||||||
_ = mongo.GetMongoCol(dc.Name).CreateIndex(mongo2.IndexModel{Keys: bson.M{constants.TaskKey: 1}})
|
|
||||||
_ = mongo.GetMongoCol(dc.Name).CreateIndex(mongo2.IndexModel{Keys: bson.M{constants.HashKey: 1}})
|
|
||||||
} else {
|
|
||||||
// with id
|
|
||||||
dc, err := modelSvc.GetById(s.ColId)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
s.ColId = dc.Id
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func UpsertSpiderDataCollection(s *models2.SpiderV2) (err error) {
|
|
||||||
return upsertSpiderDataCollection(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getSpiderRootPath(c *gin.Context) (rootPath string, err error) {
|
func getSpiderRootPath(c *gin.Context) (rootPath string, err error) {
|
||||||
// spider id
|
// spider id
|
||||||
id, err := primitive.ObjectIDFromHex(c.Param("id"))
|
id, err := primitive.ObjectIDFromHex(c.Param("id"))
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import (
|
|||||||
log2 "github.com/apex/log"
|
log2 "github.com/apex/log"
|
||||||
"github.com/crawlab-team/crawlab/core/constants"
|
"github.com/crawlab-team/crawlab/core/constants"
|
||||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||||
models2 "github.com/crawlab-team/crawlab/core/models/models/v2"
|
"github.com/crawlab-team/crawlab/core/models/models/v2"
|
||||||
"github.com/crawlab-team/crawlab/core/models/service"
|
"github.com/crawlab-team/crawlab/core/models/service"
|
||||||
"github.com/crawlab-team/crawlab/core/result"
|
"github.com/crawlab-team/crawlab/core/result"
|
||||||
"github.com/crawlab-team/crawlab/core/spider/admin"
|
"github.com/crawlab-team/crawlab/core/spider/admin"
|
||||||
@@ -34,7 +34,7 @@ func GetTaskById(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// task
|
// task
|
||||||
t, err := service.NewModelServiceV2[models2.TaskV2]().GetById(id)
|
t, err := service.NewModelServiceV2[models.TaskV2]().GetById(id)
|
||||||
if errors.Is(err, mongo2.ErrNoDocuments) {
|
if errors.Is(err, mongo2.ErrNoDocuments) {
|
||||||
HandleErrorNotFound(c, err)
|
HandleErrorNotFound(c, err)
|
||||||
return
|
return
|
||||||
@@ -45,7 +45,7 @@ func GetTaskById(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// spider
|
// spider
|
||||||
t.Spider, _ = service.NewModelServiceV2[models2.SpiderV2]().GetById(t.SpiderId)
|
t.Spider, _ = service.NewModelServiceV2[models.SpiderV2]().GetById(t.SpiderId)
|
||||||
|
|
||||||
// skip if task status is pending
|
// skip if task status is pending
|
||||||
if t.Status == constants.TaskStatusPending {
|
if t.Status == constants.TaskStatusPending {
|
||||||
@@ -54,7 +54,7 @@ func GetTaskById(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// task stat
|
// task stat
|
||||||
t.Stat, _ = service.NewModelServiceV2[models2.TaskStatV2]().GetById(id)
|
t.Stat, _ = service.NewModelServiceV2[models.TaskStatV2]().GetById(id)
|
||||||
|
|
||||||
HandleSuccessWithData(c, t)
|
HandleSuccessWithData(c, t)
|
||||||
}
|
}
|
||||||
@@ -62,7 +62,7 @@ func GetTaskById(c *gin.Context) {
|
|||||||
func GetTaskList(c *gin.Context) {
|
func GetTaskList(c *gin.Context) {
|
||||||
withStats := c.Query("stats")
|
withStats := c.Query("stats")
|
||||||
if withStats == "" {
|
if withStats == "" {
|
||||||
NewControllerV2[models2.TaskV2]().GetList(c)
|
NewControllerV2[models.TaskV2]().GetList(c)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -72,7 +72,7 @@ func GetTaskList(c *gin.Context) {
|
|||||||
sort := MustGetSortOption(c)
|
sort := MustGetSortOption(c)
|
||||||
|
|
||||||
// get tasks
|
// get tasks
|
||||||
tasks, err := service.NewModelServiceV2[models2.TaskV2]().GetMany(query, &mongo.FindOptions{
|
tasks, err := service.NewModelServiceV2[models.TaskV2]().GetMany(query, &mongo.FindOptions{
|
||||||
Sort: sort,
|
Sort: sort,
|
||||||
Skip: pagination.Size * (pagination.Page - 1),
|
Skip: pagination.Size * (pagination.Page - 1),
|
||||||
Limit: pagination.Size,
|
Limit: pagination.Size,
|
||||||
@@ -101,14 +101,14 @@ func GetTaskList(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// total count
|
// total count
|
||||||
total, err := service.NewModelServiceV2[models2.TaskV2]().Count(query)
|
total, err := service.NewModelServiceV2[models.TaskV2]().Count(query)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
HandleErrorInternalServerError(c, err)
|
HandleErrorInternalServerError(c, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// stat list
|
// stat list
|
||||||
stats, err := service.NewModelServiceV2[models2.TaskStatV2]().GetMany(bson.M{
|
stats, err := service.NewModelServiceV2[models.TaskStatV2]().GetMany(bson.M{
|
||||||
"_id": bson.M{
|
"_id": bson.M{
|
||||||
"$in": taskIds,
|
"$in": taskIds,
|
||||||
},
|
},
|
||||||
@@ -119,13 +119,13 @@ func GetTaskList(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// cache stat list to dict
|
// cache stat list to dict
|
||||||
statsDict := map[primitive.ObjectID]models2.TaskStatV2{}
|
statsDict := map[primitive.ObjectID]models.TaskStatV2{}
|
||||||
for _, s := range stats {
|
for _, s := range stats {
|
||||||
statsDict[s.Id] = s
|
statsDict[s.Id] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
// spider list
|
// spider list
|
||||||
spiders, err := service.NewModelServiceV2[models2.SpiderV2]().GetMany(bson.M{
|
spiders, err := service.NewModelServiceV2[models.SpiderV2]().GetMany(bson.M{
|
||||||
"_id": bson.M{
|
"_id": bson.M{
|
||||||
"$in": spiderIds,
|
"$in": spiderIds,
|
||||||
},
|
},
|
||||||
@@ -136,7 +136,7 @@ func GetTaskList(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// cache spider list to dict
|
// cache spider list to dict
|
||||||
spiderDict := map[primitive.ObjectID]models2.SpiderV2{}
|
spiderDict := map[primitive.ObjectID]models.SpiderV2{}
|
||||||
for _, s := range spiders {
|
for _, s := range spiders {
|
||||||
spiderDict[s.Id] = s
|
spiderDict[s.Id] = s
|
||||||
}
|
}
|
||||||
@@ -170,22 +170,22 @@ func DeleteTaskById(c *gin.Context) {
|
|||||||
// delete in db
|
// delete in db
|
||||||
if err := mongo.RunTransaction(func(context mongo2.SessionContext) (err error) {
|
if err := mongo.RunTransaction(func(context mongo2.SessionContext) (err error) {
|
||||||
// delete task
|
// delete task
|
||||||
_, err = service.NewModelServiceV2[models2.TaskV2]().GetById(id)
|
_, err = service.NewModelServiceV2[models.TaskV2]().GetById(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = service.NewModelServiceV2[models2.TaskV2]().DeleteById(id)
|
err = service.NewModelServiceV2[models.TaskV2]().DeleteById(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete task stat
|
// delete task stat
|
||||||
_, err = service.NewModelServiceV2[models2.TaskStatV2]().GetById(id)
|
_, err = service.NewModelServiceV2[models.TaskStatV2]().GetById(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log2.Warnf("delete task stat error: %s", err.Error())
|
log2.Warnf("delete task stat error: %s", err.Error())
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
err = service.NewModelServiceV2[models2.TaskStatV2]().DeleteById(id)
|
err = service.NewModelServiceV2[models.TaskStatV2]().DeleteById(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log2.Warnf("delete task stat error: %s", err.Error())
|
log2.Warnf("delete task stat error: %s", err.Error())
|
||||||
return nil
|
return nil
|
||||||
@@ -217,7 +217,7 @@ func DeleteList(c *gin.Context) {
|
|||||||
|
|
||||||
if err := mongo.RunTransaction(func(context mongo2.SessionContext) error {
|
if err := mongo.RunTransaction(func(context mongo2.SessionContext) error {
|
||||||
// delete tasks
|
// delete tasks
|
||||||
if err := service.NewModelServiceV2[models2.TaskV2]().DeleteMany(bson.M{
|
if err := service.NewModelServiceV2[models.TaskV2]().DeleteMany(bson.M{
|
||||||
"_id": bson.M{
|
"_id": bson.M{
|
||||||
"$in": payload.Ids,
|
"$in": payload.Ids,
|
||||||
},
|
},
|
||||||
@@ -226,7 +226,7 @@ func DeleteList(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// delete task stats
|
// delete task stats
|
||||||
if err := service.NewModelServiceV2[models2.TaskV2]().DeleteMany(bson.M{
|
if err := service.NewModelServiceV2[models.TaskV2]().DeleteMany(bson.M{
|
||||||
"_id": bson.M{
|
"_id": bson.M{
|
||||||
"$in": payload.Ids,
|
"$in": payload.Ids,
|
||||||
},
|
},
|
||||||
@@ -261,7 +261,7 @@ func DeleteList(c *gin.Context) {
|
|||||||
|
|
||||||
func PostTaskRun(c *gin.Context) {
|
func PostTaskRun(c *gin.Context) {
|
||||||
// task
|
// task
|
||||||
var t models2.TaskV2
|
var t models.TaskV2
|
||||||
if err := c.ShouldBindJSON(&t); err != nil {
|
if err := c.ShouldBindJSON(&t); err != nil {
|
||||||
HandleErrorBadRequest(c, err)
|
HandleErrorBadRequest(c, err)
|
||||||
return
|
return
|
||||||
@@ -274,7 +274,7 @@ func PostTaskRun(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// spider
|
// spider
|
||||||
s, err := service.NewModelServiceV2[models2.SpiderV2]().GetById(t.SpiderId)
|
s, err := service.NewModelServiceV2[models.SpiderV2]().GetById(t.SpiderId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
HandleErrorInternalServerError(c, err)
|
HandleErrorInternalServerError(c, err)
|
||||||
return
|
return
|
||||||
@@ -319,7 +319,7 @@ func PostTaskRestart(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// task
|
// task
|
||||||
t, err := service.NewModelServiceV2[models2.TaskV2]().GetById(id)
|
t, err := service.NewModelServiceV2[models.TaskV2]().GetById(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
HandleErrorInternalServerError(c, err)
|
HandleErrorInternalServerError(c, err)
|
||||||
return
|
return
|
||||||
@@ -363,7 +363,7 @@ func PostTaskCancel(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// task
|
// task
|
||||||
t, err := service.NewModelServiceV2[models2.TaskV2]().GetById(id)
|
t, err := service.NewModelServiceV2[models.TaskV2]().GetById(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
HandleErrorInternalServerError(c, err)
|
HandleErrorInternalServerError(c, err)
|
||||||
return
|
return
|
||||||
@@ -446,7 +446,7 @@ func GetTaskData(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// task
|
// task
|
||||||
t, err := service.NewModelServiceV2[models2.TaskV2]().GetById(id)
|
t, err := service.NewModelServiceV2[models.TaskV2]().GetById(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
HandleErrorInternalServerError(c, err)
|
HandleErrorInternalServerError(c, err)
|
||||||
return
|
return
|
||||||
|
|||||||
11
core/database/interfaces/database_registry_service.go
Normal file
11
core/database/interfaces/database_registry_service.go
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
package interfaces
|
||||||
|
|
||||||
|
import (
|
||||||
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
||||||
|
)
|
||||||
|
|
||||||
|
type DatabaseRegistryService interface {
|
||||||
|
Start()
|
||||||
|
CheckStatus()
|
||||||
|
GetDatabaseService(id primitive.ObjectID) (res DatabaseService, err error)
|
||||||
|
}
|
||||||
27
core/database/interfaces/database_service.go
Normal file
27
core/database/interfaces/database_service.go
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package interfaces
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/crawlab-team/crawlab/core/database/entity"
|
||||||
|
"github.com/crawlab-team/crawlab/core/models/models/v2"
|
||||||
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
||||||
|
)
|
||||||
|
|
||||||
|
type DatabaseService interface {
|
||||||
|
TestConnection(id primitive.ObjectID) (err error)
|
||||||
|
GetMetadata(id primitive.ObjectID) (m *entity.DatabaseMetadata, err error)
|
||||||
|
GetMetadataAllDb(id primitive.ObjectID) (m *entity.DatabaseMetadata, err error)
|
||||||
|
CreateDatabase(id primitive.ObjectID, databaseName string) (err error)
|
||||||
|
DropDatabase(id primitive.ObjectID, databaseName string) (err error)
|
||||||
|
GetTableMetadata(id primitive.ObjectID, databaseName, tableName string) (table *entity.DatabaseTable, err error)
|
||||||
|
CreateTable(id primitive.ObjectID, databaseName string, table *entity.DatabaseTable) (err error)
|
||||||
|
ModifyTable(id primitive.ObjectID, databaseName string, table *entity.DatabaseTable) (err error)
|
||||||
|
DropTable(id primitive.ObjectID, databaseName, tableName string) (err error)
|
||||||
|
RenameTable(id primitive.ObjectID, databaseName, oldTableName, newTableName string) (err error)
|
||||||
|
GetColumnTypes(query string) (types []string)
|
||||||
|
ReadRows(id primitive.ObjectID, databaseName, tableName string, filter map[string]interface{}, skip, limit int) ([]map[string]interface{}, int64, error)
|
||||||
|
CreateRow(id primitive.ObjectID, databaseName, tableName string, row map[string]interface{}) error
|
||||||
|
UpdateRow(id primitive.ObjectID, databaseName, tableName string, filter map[string]interface{}, update map[string]interface{}) error
|
||||||
|
DeleteRow(id primitive.ObjectID, databaseName, tableName string, filter map[string]interface{}) error
|
||||||
|
Query(id primitive.ObjectID, databaseName, query string) (results *entity.DatabaseQueryResults, err error)
|
||||||
|
GetCurrentMetric(id primitive.ObjectID) (m *models.DatabaseMetricV2, err error)
|
||||||
|
}
|
||||||
15
core/database/registry_service.go
Normal file
15
core/database/registry_service.go
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
package database
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/crawlab-team/crawlab/core/database/interfaces"
|
||||||
|
)
|
||||||
|
|
||||||
|
var serviceInstance interfaces.DatabaseRegistryService
|
||||||
|
|
||||||
|
func SetDatabaseRegistryService(svc interfaces.DatabaseRegistryService) {
|
||||||
|
serviceInstance = svc
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetDatabaseRegistryService() interfaces.DatabaseRegistryService {
|
||||||
|
return serviceInstance
|
||||||
|
}
|
||||||
@@ -214,7 +214,7 @@ func (svr TaskServerV2) handleInsertData(msg *grpc.StreamMessage) (err error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
var records []interface{}
|
var records []map[string]interface{}
|
||||||
for _, d := range data.Records {
|
for _, d := range data.Records {
|
||||||
res, ok := d[constants.TaskKey]
|
res, ok := d[constants.TaskKey]
|
||||||
if ok {
|
if ok {
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ type SpiderV2 struct {
|
|||||||
any `collection:"spiders"`
|
any `collection:"spiders"`
|
||||||
BaseModelV2[SpiderV2] `bson:",inline"`
|
BaseModelV2[SpiderV2] `bson:",inline"`
|
||||||
Name string `json:"name" bson:"name"` // spider name
|
Name string `json:"name" bson:"name"` // spider name
|
||||||
ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id
|
ColId primitive.ObjectID `json:"col_id" bson:"col_id"` // data collection id (deprecated) # TODO: remove this field in the future
|
||||||
ColName string `json:"col_name,omitempty" bson:"-"` // data collection name
|
ColName string `json:"col_name,omitempty" bson:"col_name"` // data collection name
|
||||||
DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id
|
DataSourceId primitive.ObjectID `json:"data_source_id" bson:"data_source_id"` // data source id
|
||||||
DataSource *DatabaseV2 `json:"data_source,omitempty" bson:"-"` // data source
|
DataSource *DatabaseV2 `json:"data_source,omitempty" bson:"-"` // data source
|
||||||
Description string `json:"description" bson:"description"` // description
|
Description string `json:"description" bson:"description"` // description
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ func NewResultService(registryKey string, s *models.Spider) (svc2 interfaces.Res
|
|||||||
|
|
||||||
var store = sync.Map{}
|
var store = sync.Map{}
|
||||||
|
|
||||||
func GetResultService(spiderId primitive.ObjectID, opts ...Option) (svc2 interfaces.ResultService, err error) {
|
func GetResultService(spiderId primitive.ObjectID) (svc2 interfaces.ResultService, err error) {
|
||||||
// model service
|
// model service
|
||||||
modelSvc, err := service.GetService()
|
modelSvc, err := service.GetService()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -51,12 +51,6 @@ func GetResultService(spiderId primitive.ObjectID, opts ...Option) (svc2 interfa
|
|||||||
return nil, trace.TraceError(err)
|
return nil, trace.TraceError(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// apply options
|
|
||||||
_opts := &Options{}
|
|
||||||
for _, opt := range opts {
|
|
||||||
opt(_opts)
|
|
||||||
}
|
|
||||||
|
|
||||||
// store key
|
// store key
|
||||||
storeKey := s.ColId.Hex() + ":" + s.DataSourceId.Hex()
|
storeKey := s.ColId.Hex() + ":" + s.DataSourceId.Hex()
|
||||||
|
|
||||||
|
|||||||
@@ -1,76 +0,0 @@
|
|||||||
package test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
|
||||||
"github.com/crawlab-team/crawlab/core/models/delegate"
|
|
||||||
"github.com/crawlab-team/crawlab/core/models/models"
|
|
||||||
"github.com/crawlab-team/crawlab/core/models/service"
|
|
||||||
"github.com/crawlab-team/crawlab/core/result"
|
|
||||||
"github.com/crawlab-team/crawlab/db/mongo"
|
|
||||||
"go.uber.org/dig"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
T = NewTest()
|
|
||||||
}
|
|
||||||
|
|
||||||
var T *Test
|
|
||||||
|
|
||||||
type Test struct {
|
|
||||||
// dependencies
|
|
||||||
modelSvc service.ModelService
|
|
||||||
resultSvc interfaces.ResultService
|
|
||||||
|
|
||||||
// test data
|
|
||||||
TestColName string
|
|
||||||
TestCol *mongo.Col
|
|
||||||
TestDc *models.DataCollection
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Test) Setup(t2 *testing.T) {
|
|
||||||
t2.Cleanup(t.Cleanup)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (t *Test) Cleanup() {
|
|
||||||
_ = t.modelSvc.DropAll()
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewTest() *Test {
|
|
||||||
var err error
|
|
||||||
|
|
||||||
// test
|
|
||||||
t := &Test{
|
|
||||||
TestColName: "test_results",
|
|
||||||
}
|
|
||||||
|
|
||||||
// dependency injection
|
|
||||||
c := dig.New()
|
|
||||||
if err := c.Provide(service.NewService); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
if err := c.Invoke(func(
|
|
||||||
modelSvc service.ModelService,
|
|
||||||
) {
|
|
||||||
t.modelSvc = modelSvc
|
|
||||||
}); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// data collection
|
|
||||||
t.TestDc = &models.DataCollection{
|
|
||||||
Name: t.TestColName,
|
|
||||||
}
|
|
||||||
if err := delegate.NewModelDelegate(t.TestDc).Add(); err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
t.TestCol = mongo.GetMongoCol(t.TestColName)
|
|
||||||
|
|
||||||
// result service
|
|
||||||
t.resultSvc, err = result.GetResultService(t.TestDc.GetId())
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
@@ -1,67 +0,0 @@
|
|||||||
package test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/crawlab-team/crawlab/core/models/models"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestResultService_GetList(t *testing.T) {
|
|
||||||
var err error
|
|
||||||
T.Setup(t)
|
|
||||||
|
|
||||||
n := 1000
|
|
||||||
var docs []interface{}
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
d := &models.Result{
|
|
||||||
"i": i,
|
|
||||||
}
|
|
||||||
docs = append(docs, d)
|
|
||||||
}
|
|
||||||
_, err = T.TestCol.InsertMany(docs)
|
|
||||||
require.Nil(t, err)
|
|
||||||
|
|
||||||
// get all
|
|
||||||
results, err := T.resultSvc.List(nil, nil)
|
|
||||||
require.Nil(t, err)
|
|
||||||
require.Equal(t, n, len(results))
|
|
||||||
|
|
||||||
//query := bson.M{
|
|
||||||
// "i": bson.M{
|
|
||||||
// "$lt": n / 2,
|
|
||||||
// },
|
|
||||||
//}
|
|
||||||
//results, err = T.resultSvc.List(query, nil)
|
|
||||||
//require.Nil(t, err)
|
|
||||||
//require.Equal(t, n/2, len(results))
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestResultService_Count(t *testing.T) {
|
|
||||||
var err error
|
|
||||||
T.Setup(t)
|
|
||||||
|
|
||||||
n := 1000
|
|
||||||
var docs []interface{}
|
|
||||||
for i := 0; i < n; i++ {
|
|
||||||
d := &models.Result{
|
|
||||||
"i": i,
|
|
||||||
}
|
|
||||||
docs = append(docs, d)
|
|
||||||
}
|
|
||||||
_, err = T.TestCol.InsertMany(docs)
|
|
||||||
require.Nil(t, err)
|
|
||||||
|
|
||||||
// get all
|
|
||||||
total, err := T.resultSvc.Count(nil)
|
|
||||||
require.Nil(t, err)
|
|
||||||
require.Equal(t, n, total)
|
|
||||||
|
|
||||||
//query := bson.M{
|
|
||||||
// "i": bson.M{
|
|
||||||
// "$lt": n / 2,
|
|
||||||
// },
|
|
||||||
//}
|
|
||||||
//total, err = T.resultSvc.Count(query)
|
|
||||||
//require.Nil(t, err)
|
|
||||||
//require.Equal(t, n/2, total)
|
|
||||||
}
|
|
||||||
@@ -1,12 +1,16 @@
|
|||||||
package stats
|
package stats
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
log2 "github.com/apex/log"
|
||||||
|
"github.com/crawlab-team/crawlab/core/database"
|
||||||
|
interfaces2 "github.com/crawlab-team/crawlab/core/database/interfaces"
|
||||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||||
models2 "github.com/crawlab-team/crawlab/core/models/models/v2"
|
models2 "github.com/crawlab-team/crawlab/core/models/models/v2"
|
||||||
"github.com/crawlab-team/crawlab/core/models/service"
|
"github.com/crawlab-team/crawlab/core/models/service"
|
||||||
nodeconfig "github.com/crawlab-team/crawlab/core/node/config"
|
nodeconfig "github.com/crawlab-team/crawlab/core/node/config"
|
||||||
"github.com/crawlab-team/crawlab/core/result"
|
|
||||||
"github.com/crawlab-team/crawlab/core/task/log"
|
"github.com/crawlab-team/crawlab/core/task/log"
|
||||||
|
"github.com/crawlab-team/crawlab/core/utils"
|
||||||
|
"github.com/crawlab-team/crawlab/db/mongo"
|
||||||
"github.com/crawlab-team/crawlab/trace"
|
"github.com/crawlab-team/crawlab/trace"
|
||||||
"go.mongodb.org/mongo-driver/bson"
|
"go.mongodb.org/mongo-driver/bson"
|
||||||
"go.mongodb.org/mongo-driver/bson/primitive"
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
||||||
@@ -14,15 +18,23 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type databaseServiceItem struct {
|
||||||
|
taskId primitive.ObjectID
|
||||||
|
dbId primitive.ObjectID
|
||||||
|
dbSvc interfaces2.DatabaseService
|
||||||
|
tableName string
|
||||||
|
time time.Time
|
||||||
|
}
|
||||||
|
|
||||||
type ServiceV2 struct {
|
type ServiceV2 struct {
|
||||||
// dependencies
|
// dependencies
|
||||||
nodeCfgSvc interfaces.NodeConfigService
|
nodeCfgSvc interfaces.NodeConfigService
|
||||||
|
|
||||||
// internals
|
// internals
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
resultServices sync.Map
|
databaseServiceItems map[string]*databaseServiceItem
|
||||||
rsTtl time.Duration
|
databaseServiceTll time.Duration
|
||||||
logDriver log.Driver
|
logDriver log.Driver
|
||||||
}
|
}
|
||||||
|
|
||||||
func (svc *ServiceV2) Init() (err error) {
|
func (svc *ServiceV2) Init() (err error) {
|
||||||
@@ -30,15 +42,39 @@ func (svc *ServiceV2) Init() (err error) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (svc *ServiceV2) InsertData(id primitive.ObjectID, records ...interface{}) (err error) {
|
func (svc *ServiceV2) InsertData(taskId primitive.ObjectID, records ...map[string]interface{}) (err error) {
|
||||||
resultSvc, err := svc.getResultService(id)
|
count := 0
|
||||||
|
|
||||||
|
item, err := svc.getDatabaseServiceItem(taskId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := resultSvc.Insert(records...); err != nil {
|
dbId := item.dbId
|
||||||
return err
|
dbSvc := item.dbSvc
|
||||||
|
tableName := item.tableName
|
||||||
|
if utils.IsPro() && dbSvc != nil {
|
||||||
|
for _, record := range records {
|
||||||
|
if err := dbSvc.CreateRow(dbId, "", tableName, record); err != nil {
|
||||||
|
log2.Errorf("failed to insert data: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var records2 []interface{}
|
||||||
|
for _, record := range records {
|
||||||
|
records2 = append(records2, record)
|
||||||
|
}
|
||||||
|
_, err = mongo.GetMongoCol(tableName).InsertMany(records2)
|
||||||
|
if err != nil {
|
||||||
|
log2.Errorf("failed to insert data: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
count = len(records)
|
||||||
}
|
}
|
||||||
go svc.updateTaskStats(id, len(records))
|
|
||||||
|
go svc.updateTaskStats(taskId, count)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -46,38 +82,52 @@ func (svc *ServiceV2) InsertLogs(id primitive.ObjectID, logs ...string) (err err
|
|||||||
return svc.logDriver.WriteLines(id.Hex(), logs)
|
return svc.logDriver.WriteLines(id.Hex(), logs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (svc *ServiceV2) getResultService(id primitive.ObjectID) (resultSvc interfaces.ResultService, err error) {
|
func (svc *ServiceV2) getDatabaseServiceItem(taskId primitive.ObjectID) (item *databaseServiceItem, err error) {
|
||||||
// atomic operation
|
// atomic operation
|
||||||
svc.mu.Lock()
|
svc.mu.Lock()
|
||||||
defer svc.mu.Unlock()
|
defer svc.mu.Unlock()
|
||||||
|
|
||||||
// attempt to get from cache
|
// attempt to get from cache
|
||||||
res, _ := svc.resultServices.Load(id.Hex())
|
item, ok := svc.databaseServiceItems[taskId.Hex()]
|
||||||
if res != nil {
|
if ok {
|
||||||
// hit in cache
|
// hit in cache
|
||||||
resultSvc, ok := res.(interfaces.ResultService)
|
item.time = time.Now()
|
||||||
resultSvc.SetTime(time.Now())
|
return item, nil
|
||||||
if ok {
|
|
||||||
return resultSvc, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// task
|
// task
|
||||||
t, err := service.NewModelServiceV2[models2.TaskV2]().GetById(id)
|
t, err := service.NewModelServiceV2[models2.TaskV2]().GetById(taskId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// result service
|
// spider
|
||||||
resultSvc, err = result.GetResultService(t.SpiderId)
|
s, err := service.NewModelServiceV2[models2.SpiderV2]().GetById(t.SpiderId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// database service
|
||||||
|
var dbSvc interfaces2.DatabaseService
|
||||||
|
if utils.IsPro() {
|
||||||
|
if dbRegSvc := database.GetDatabaseRegistryService(); dbRegSvc != nil {
|
||||||
|
dbSvc, err = dbRegSvc.GetDatabaseService(s.DataSourceId)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// store in cache
|
// store in cache
|
||||||
svc.resultServices.Store(id.Hex(), resultSvc)
|
svc.databaseServiceItems[taskId.Hex()] = &databaseServiceItem{
|
||||||
|
taskId: taskId,
|
||||||
|
dbId: s.DataSourceId,
|
||||||
|
dbSvc: dbSvc,
|
||||||
|
tableName: s.ColName,
|
||||||
|
time: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
return resultSvc, nil
|
return item, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (svc *ServiceV2) updateTaskStats(id primitive.ObjectID, resultCount int) {
|
func (svc *ServiceV2) updateTaskStats(id primitive.ObjectID, resultCount int) {
|
||||||
@@ -96,13 +146,11 @@ func (svc *ServiceV2) cleanup() {
|
|||||||
// atomic operation
|
// atomic operation
|
||||||
svc.mu.Lock()
|
svc.mu.Lock()
|
||||||
|
|
||||||
svc.resultServices.Range(func(key, value interface{}) bool {
|
for k, v := range svc.databaseServiceItems {
|
||||||
rs := value.(interfaces.ResultService)
|
if time.Now().After(v.time.Add(svc.databaseServiceTll)) {
|
||||||
if time.Now().After(rs.GetTime().Add(svc.rsTtl)) {
|
delete(svc.databaseServiceItems, k)
|
||||||
svc.resultServices.Delete(key)
|
|
||||||
}
|
}
|
||||||
return true
|
}
|
||||||
})
|
|
||||||
|
|
||||||
svc.mu.Unlock()
|
svc.mu.Unlock()
|
||||||
|
|
||||||
@@ -113,8 +161,9 @@ func (svc *ServiceV2) cleanup() {
|
|||||||
func NewTaskStatsServiceV2() (svc2 *ServiceV2, err error) {
|
func NewTaskStatsServiceV2() (svc2 *ServiceV2, err error) {
|
||||||
// service
|
// service
|
||||||
svc := &ServiceV2{
|
svc := &ServiceV2{
|
||||||
mu: sync.Mutex{},
|
mu: sync.Mutex{},
|
||||||
resultServices: sync.Map{},
|
databaseServiceItems: map[string]*databaseServiceItem{},
|
||||||
|
databaseServiceTll: 10 * time.Minute,
|
||||||
}
|
}
|
||||||
|
|
||||||
svc.nodeCfgSvc = nodeconfig.GetNodeConfigService()
|
svc.nodeCfgSvc = nodeconfig.GetNodeConfigService()
|
||||||
|
|||||||
Reference in New Issue
Block a user