feat: optimized dependency logic

This commit is contained in:
Marvin Zhang
2024-11-05 18:21:52 +08:00
parent 6af282058d
commit a0989d36db
13 changed files with 375 additions and 133 deletions

View File

@@ -1,11 +0,0 @@
package entity
import "go.mongodb.org/mongo-driver/bson/primitive"
type DependencyRepo struct {
Name string `json:"name,omitempty" bson:"name,omitempty"`
NodeIds []primitive.ObjectID `json:"node_ids,omitempty" bson:"node_ids,omitempty"`
Versions []string `json:"versions,omitempty" bson:"versions,omitempty"`
LatestVersion string `json:"latest_version" bson:"latest_version"`
Type string `json:"type" bson:"type"`
}

View File

@@ -3,18 +3,20 @@ package server
import (
"context"
"errors"
"io"
"sync"
"time"
"github.com/crawlab-team/crawlab-pro/core/dependency/constants"
"github.com/apex/log"
"github.com/crawlab-team/crawlab/core/models/models"
"github.com/crawlab-team/crawlab/core/models/service"
mongo2 "github.com/crawlab-team/crawlab/db/mongo"
"github.com/crawlab-team/crawlab/grpc"
"github.com/crawlab-team/crawlab/trace"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.mongodb.org/mongo-driver/mongo"
"io"
"sync"
"time"
)
type DependencyServiceServer struct {
@@ -36,12 +38,15 @@ func (svr DependencyServiceServer) Connect(req *grpc.DependencyServiceConnectReq
return nil
}
func (svr DependencyServiceServer) Sync(ctx context.Context, request *grpc.DependencyServiceSyncRequest) (response *grpc.Response, err error) {
// Sync handles synchronization of dependencies between nodes and the database
func (svr DependencyServiceServer) Sync(_ context.Context, request *grpc.DependencyServiceSyncRequest) (response *grpc.Response, err error) {
// Get node by node key
n, err := service.NewModelService[models.Node]().GetOne(bson.M{"key": request.NodeKey}, nil)
if err != nil {
return nil, err
}
// Get existing dependencies from database for this node and language
depsDb, err := service.NewModelService[models.Dependency]().GetMany(bson.M{
"node_id": n.Id,
"type": request.Lang,
@@ -53,30 +58,45 @@ func (svr DependencyServiceServer) Sync(ctx context.Context, request *grpc.Depen
}
}
// Create map of existing dependencies for faster lookup
depsDbMap := make(map[string]*models.Dependency)
for _, d := range depsDb {
depsDbMap[d.Name] = &d
}
// Process new dependencies from request
var depsToInsert []models.Dependency
var depsToUpdate []models.Dependency
depsMap := make(map[string]*models.Dependency)
for _, dep := range request.Dependencies {
// Create dependency model
d := models.Dependency{
Name: dep.Name,
NodeId: n.Id,
Type: request.Lang,
Version: dep.Version,
Status: constants.DependencyStatusInstalled,
}
d.SetCreatedAt(time.Now())
d.SetUpdatedAt(time.Now())
// Add to map
depsMap[d.Name] = &d
_, ok := depsDbMap[d.Name]
// Check if dependency exists in DB
existingDep, ok := depsDbMap[d.Name]
if !ok {
// If dependency doesn't exist, add to insert list
depsToInsert = append(depsToInsert, d)
} else if existingDep.Version != d.Version || existingDep.Status != constants.DependencyStatusInstalled {
// If dependency exists but version is different or status is not installed, add to update list
d.Id = existingDep.Id
d.SetUpdatedAt(time.Now())
depsToUpdate = append(depsToUpdate, d)
}
}
// Find dependencies to delete (exist in DB but not in request)
var depIdsToDelete []primitive.ObjectID
for _, d := range depsDb {
_, ok := depsMap[d.Name]
@@ -85,23 +105,33 @@ func (svr DependencyServiceServer) Sync(ctx context.Context, request *grpc.Depen
}
}
// Run database operations in a transaction
err = mongo2.RunTransaction(func(ctx mongo.SessionContext) (err error) {
// Delete old dependencies if any
if len(depIdsToDelete) > 0 {
err = service.NewModelService[models.Dependency]().DeleteMany(bson.M{
"_id": bson.M{"$in": depIdsToDelete},
})
if err != nil {
log.Errorf("[DependencyServiceServer] delete dependencies in db error: %v", err)
trace.PrintError(err)
return err
}
}
// Insert new dependencies if any
if len(depsToInsert) > 0 {
_, err = service.NewModelService[models.Dependency]().InsertMany(depsToInsert)
if err != nil {
log.Errorf("[DependencyServiceServer] insert dependencies in db error: %v", err)
trace.PrintError(err)
return err
}
}
// Update dependencies with different versions
for _, d := range depsToUpdate {
err = service.NewModelService[models.Dependency]().ReplaceById(d.Id, d)
if err != nil {
log.Errorf("[DependencyServiceServer] update dependency in db error: %v", err)
return err
}
}
@@ -137,32 +167,32 @@ func (svr DependencyServiceServer) UpdateLogs(stream grpc.DependencyService_Upda
// if dependency is nil, get dependency
if dep == nil {
dep, err = service.NewModelService[models.Dependency]().GetOne(bson.M{
"node_id": n.Id,
"name": req.Name,
"type": req.Lang,
}, nil)
id, err := primitive.ObjectIDFromHex(req.DependencyId)
if err != nil {
if !errors.Is(err, mongo.ErrNoDocuments) {
log.Errorf("[DependencyServiceServer] get dependency error: %v", err)
return err
}
// create dependency if not found
dep = &models.Dependency{
NodeId: n.Id,
Name: req.Name,
Type: req.Lang,
}
dep.SetCreatedAt(time.Now())
dep.SetUpdatedAt(time.Now())
dep.Id, err = service.NewModelService[models.Dependency]().InsertOne(*dep)
if err != nil {
log.Errorf("[DependencyServiceServer] insert dependency error: %v", err)
return err
}
log.Errorf("[DependencyServiceServer] convert dependency id error: %v", err)
return err
}
dep, err = service.NewModelService[models.Dependency]().GetById(id)
if err != nil {
log.Errorf("[DependencyServiceServer] get dependency error: %v", err)
return err
}
}
// insert dependency logs
var depLogs []models.DependencyLog
for _, line := range req.Logs {
depLog := models.DependencyLog{
DependencyId: dep.Id,
Content: line,
}
depLogs = append(depLogs, depLog)
}
_, err = service.NewModelService[models.DependencyLog]().InsertMany(depLogs)
if err != nil {
log.Errorf("[DependencyServiceServer] insert dependency logs error: %v", err)
return err
}
}
}

View File

@@ -3,6 +3,7 @@ package server
import (
"context"
"encoding/json"
"go.mongodb.org/mongo-driver/mongo/options"
"reflect"
"sync"
@@ -269,6 +270,30 @@ func (svr ModelBaseServiceServer) InsertMany(_ context.Context, req *grpc.ModelS
return HandleSuccessWithData(r.InsertedIDs)
}
func (svr ModelBaseServiceServer) UpsertOne(_ context.Context, req *grpc.ModelServiceUpsertOneRequest) (res *grpc.Response, err error) {
model := GetOneInstanceModel(req.ModelType)
modelType := reflect.TypeOf(model)
modelValuePtr := reflect.New(modelType).Interface()
err = json.Unmarshal(req.Model, modelValuePtr)
if err != nil {
return HandleError(err)
}
var query bson.M
err = json.Unmarshal(req.Query, &query)
if err != nil {
return HandleError(err)
}
modelSvc := GetModelService[bson.M](req.ModelType)
opts := &options.ReplaceOptions{}
opts.SetUpsert(true)
id, err := modelSvc.GetCol().GetCollection().ReplaceOne(modelSvc.GetCol().GetContext(), query, modelValuePtr, opts)
if err != nil {
return HandleError(err)
}
return HandleSuccessWithData(id)
}
func (svr ModelBaseServiceServer) Count(_ context.Context, req *grpc.ModelServiceCountRequest) (res *grpc.Response, err error) {
var query bson.M
err = json.Unmarshal(req.Query, &query)

View File

@@ -2,6 +2,9 @@ package client
import (
"encoding/json"
"reflect"
"sync"
"github.com/crawlab-team/crawlab/core/grpc/client"
"github.com/crawlab-team/crawlab/core/interfaces"
nodeconfig "github.com/crawlab-team/crawlab/core/node/config"
@@ -9,8 +12,6 @@ import (
"github.com/crawlab-team/crawlab/grpc"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/primitive"
"reflect"
"sync"
)
var (
@@ -255,11 +256,6 @@ func (svc *ModelService[T]) InsertOne(model T) (id primitive.ObjectID, err error
return primitive.NilObjectID, err
}
return deserialize[primitive.ObjectID](res)
//idStr, err := deserialize[string](res)
//if err != nil {
// return primitive.NilObjectID, err
//}
//return primitive.ObjectIDFromHex(idStr)
}
func (svc *ModelService[T]) InsertMany(models []T) (ids []primitive.ObjectID, err error) {
@@ -280,6 +276,30 @@ func (svc *ModelService[T]) InsertMany(models []T) (ids []primitive.ObjectID, er
return deserialize[[]primitive.ObjectID](res)
}
func (svc *ModelService[T]) UpsertOne(query bson.M, model T) (id primitive.ObjectID, err error) {
ctx, cancel := svc.c.Context()
defer cancel()
queryData, err := json.Marshal(query)
if err != nil {
return primitive.NilObjectID, err
}
modelData, err := json.Marshal(model)
if err != nil {
return primitive.NilObjectID, err
}
res, err := svc.c.ModelBaseServiceClient.UpsertOne(ctx, &grpc.ModelServiceUpsertOneRequest{
NodeKey: svc.cfg.GetNodeKey(),
ModelType: svc.modelType,
Query: queryData,
Model: modelData,
})
if err != nil {
return primitive.NilObjectID, err
}
return deserialize[primitive.ObjectID](res)
}
func (svc *ModelService[T]) Count(query bson.M) (total int, err error) {
ctx, cancel := svc.c.Context()
defer cancel()

View File

@@ -1,7 +1,10 @@
package models
import "go.mongodb.org/mongo-driver/bson/primitive"
type DependencyLog struct {
any `collection:"dependency_logs"`
BaseModel[DependencyLog] `bson:",inline"`
Content string `json:"content" bson:"content"`
DependencyId primitive.ObjectID `json:"dependency_id" bson:"dependency_id"`
Content string `json:"content" bson:"content"`
}

View File

@@ -3,10 +3,11 @@ package service
import (
"context"
"fmt"
"go.mongodb.org/mongo-driver/mongo/options"
"reflect"
"sync"
"go.mongodb.org/mongo-driver/mongo/options"
"github.com/crawlab-team/crawlab/core/interfaces"
"github.com/crawlab-team/crawlab/db/mongo"
"go.mongodb.org/mongo-driver/bson"
@@ -249,6 +250,44 @@ func (svc *ModelService[T]) InsertManyContext(ctx context.Context, models []T) (
}
return ids, nil
}
func (svc *ModelService[T]) UpsertOne(query bson.M, model T) (id primitive.ObjectID, err error) {
opts := options.ReplaceOptions{}
opts.SetUpsert(true)
result, err := svc.col.GetCollection().ReplaceOne(svc.col.GetContext(), query, model, &opts)
if err != nil {
return primitive.NilObjectID, err
}
if result.UpsertedID != nil {
// If document was inserted
return result.UpsertedID.(primitive.ObjectID), nil
}
// If document was updated, get its ID from the model
m := any(&model).(interfaces.Model)
return m.GetId(), nil
}
func (svc *ModelService[T]) UpsertOneContext(ctx context.Context, query bson.M, model T) (id primitive.ObjectID, err error) {
opts := options.ReplaceOptions{}
opts.SetUpsert(true)
result, err := svc.col.GetCollection().ReplaceOne(ctx, query, model, &opts)
if err != nil {
return primitive.NilObjectID, err
}
if result.UpsertedID != nil {
// If document was inserted
return result.UpsertedID.(primitive.ObjectID), nil
}
// If document was updated, get its ID from the query or model
if id, ok := query["_id"].(primitive.ObjectID); ok {
return id, nil
}
m := any(&model).(interfaces.Model)
return m.GetId(), nil
}
func (svc *ModelService[T]) Count(query bson.M) (total int, err error) {
return svc.col.Count(query)