mirror of
https://github.com/crawlab-team/crawlab.git
synced 2026-01-21 17:21:09 +01:00
feat: added modules
This commit is contained in:
16
core/result/options.go
Normal file
16
core/result/options.go
Normal file
@@ -0,0 +1,16 @@
|
||||
package result
|
||||
|
||||
import "go.mongodb.org/mongo-driver/bson/primitive"
|
||||
|
||||
type Option func(opts *Options)
|
||||
|
||||
type Options struct {
|
||||
registryKey string // registry key
|
||||
SpiderId primitive.ObjectID // data source id
|
||||
}
|
||||
|
||||
func WithRegistryKey(key string) Option {
|
||||
return func(opts *Options) {
|
||||
opts.registryKey = key
|
||||
}
|
||||
}
|
||||
89
core/result/service.go
Normal file
89
core/result/service.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package result
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/crawlab-team/crawlab/core/errors"
|
||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
"github.com/crawlab-team/crawlab/core/models/service"
|
||||
"github.com/crawlab-team/go-trace"
|
||||
"go.mongodb.org/mongo-driver/bson/primitive"
|
||||
"sync"
|
||||
)
|
||||
|
||||
func NewResultService(registryKey string, s *models.Spider) (svc2 interfaces.ResultService, err error) {
|
||||
// result service function
|
||||
var fn interfaces.ResultServiceRegistryFn
|
||||
|
||||
if registryKey == "" {
|
||||
// default
|
||||
fn = NewResultServiceMongo
|
||||
} else {
|
||||
// from registry
|
||||
reg := GetResultServiceRegistry()
|
||||
fn = reg.Get(registryKey)
|
||||
if fn == nil {
|
||||
return nil, errors.NewResultError(fmt.Sprintf("%s is not implemented", registryKey))
|
||||
}
|
||||
}
|
||||
|
||||
// generate result service
|
||||
svc, err := fn(s.ColId, s.DataSourceId)
|
||||
if err != nil {
|
||||
return nil, trace.TraceError(err)
|
||||
}
|
||||
|
||||
return svc, nil
|
||||
}
|
||||
|
||||
var store = sync.Map{}
|
||||
|
||||
func GetResultService(spiderId primitive.ObjectID, opts ...Option) (svc2 interfaces.ResultService, err error) {
|
||||
// model service
|
||||
modelSvc, err := service.GetService()
|
||||
if err != nil {
|
||||
return nil, trace.TraceError(err)
|
||||
}
|
||||
|
||||
// spider
|
||||
s, err := modelSvc.GetSpiderById(spiderId)
|
||||
if err != nil {
|
||||
return nil, trace.TraceError(err)
|
||||
}
|
||||
|
||||
// apply options
|
||||
_opts := &Options{}
|
||||
for _, opt := range opts {
|
||||
opt(_opts)
|
||||
}
|
||||
|
||||
// store key
|
||||
storeKey := s.ColId.Hex() + ":" + s.DataSourceId.Hex()
|
||||
|
||||
// attempt to load result service from store
|
||||
res, _ := store.Load(storeKey)
|
||||
if res != nil {
|
||||
svc, ok := res.(interfaces.ResultService)
|
||||
if ok {
|
||||
return svc, nil
|
||||
}
|
||||
}
|
||||
|
||||
// registry key
|
||||
var registryKey string
|
||||
ds, _ := modelSvc.GetDataSourceById(s.DataSourceId)
|
||||
if ds != nil {
|
||||
registryKey = ds.Type
|
||||
}
|
||||
|
||||
// create a new result service if not exists
|
||||
svc, err := NewResultService(registryKey, s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// save into store
|
||||
store.Store(storeKey, svc)
|
||||
|
||||
return svc, nil
|
||||
}
|
||||
146
core/result/service_mongo.go
Normal file
146
core/result/service_mongo.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package result
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/crawlab-team/crawlab-db/generic"
|
||||
"github.com/crawlab-team/crawlab-db/mongo"
|
||||
"github.com/crawlab-team/crawlab/core/constants"
|
||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
"github.com/crawlab-team/crawlab/core/models/service"
|
||||
"github.com/crawlab-team/crawlab/core/utils"
|
||||
"github.com/crawlab-team/go-trace"
|
||||
"go.mongodb.org/mongo-driver/bson"
|
||||
"go.mongodb.org/mongo-driver/bson/primitive"
|
||||
mongo2 "go.mongodb.org/mongo-driver/mongo"
|
||||
"go.mongodb.org/mongo-driver/mongo/options"
|
||||
)
|
||||
|
||||
type ServiceMongo struct {
|
||||
// dependencies
|
||||
modelSvc service.ModelService
|
||||
modelColSvc interfaces.ModelBaseService
|
||||
|
||||
// internals
|
||||
colId primitive.ObjectID // _id of models.DataCollection
|
||||
dc *models.DataCollection // models.DataCollection
|
||||
t time.Time
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) List(query generic.ListQuery, opts *generic.ListOptions) (results []interface{}, err error) {
|
||||
_query := svc.getQuery(query)
|
||||
_opts := svc.getOpts(opts)
|
||||
return svc.getList(_query, _opts)
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) Count(query generic.ListQuery) (n int, err error) {
|
||||
_query := svc.getQuery(query)
|
||||
return svc.modelColSvc.Count(_query)
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) Insert(docs ...interface{}) (err error) {
|
||||
if svc.dc.Dedup.Enabled && len(svc.dc.Dedup.Keys) > 0 {
|
||||
for _, doc := range docs {
|
||||
hash, err := utils.GetResultHash(doc, svc.dc.Dedup.Keys)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
doc.(interfaces.Result).SetValue(constants.HashKey, hash)
|
||||
query := bson.M{constants.HashKey: hash}
|
||||
switch svc.dc.Dedup.Type {
|
||||
case constants.DedupTypeOverwrite:
|
||||
err = mongo.GetMongoCol(svc.dc.Name).ReplaceWithOptions(query, doc, &options.ReplaceOptions{Upsert: &[]bool{true}[0]})
|
||||
if err != nil {
|
||||
return trace.TraceError(err)
|
||||
}
|
||||
default:
|
||||
var o bson.M
|
||||
err := mongo.GetMongoCol(svc.dc.Name).Find(query, &mongo.FindOptions{Limit: 1}).One(&o)
|
||||
if err == nil {
|
||||
// exists, ignore
|
||||
continue
|
||||
}
|
||||
if err != mongo2.ErrNoDocuments {
|
||||
// error
|
||||
return trace.TraceError(err)
|
||||
}
|
||||
// not exists, insert
|
||||
_, err = mongo.GetMongoCol(svc.dc.Name).Insert(doc)
|
||||
if err != nil {
|
||||
return trace.TraceError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_, err = mongo.GetMongoCol(svc.dc.Name).InsertMany(docs)
|
||||
if err != nil {
|
||||
return trace.TraceError(err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) Index(fields []string) {
|
||||
for _, field := range fields {
|
||||
_ = mongo.GetMongoCol(svc.dc.Name).CreateIndex(mongo2.IndexModel{Keys: bson.M{field: 1}})
|
||||
}
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) SetTime(t time.Time) {
|
||||
svc.t = t
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) GetTime() (t time.Time) {
|
||||
return svc.t
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) getList(query bson.M, opts *mongo.FindOptions) (results []interface{}, err error) {
|
||||
list, err := svc.modelColSvc.GetList(query, opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, d := range list.GetModels() {
|
||||
r, ok := d.(interfaces.Result)
|
||||
if ok {
|
||||
results = append(results, r)
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) getQuery(query generic.ListQuery) (res bson.M) {
|
||||
return utils.GetMongoQuery(query)
|
||||
}
|
||||
|
||||
func (svc *ServiceMongo) getOpts(opts *generic.ListOptions) (res *mongo.FindOptions) {
|
||||
return utils.GetMongoOpts(opts)
|
||||
}
|
||||
|
||||
func NewResultServiceMongo(colId primitive.ObjectID, _ primitive.ObjectID) (svc2 interfaces.ResultService, err error) {
|
||||
// service
|
||||
svc := &ServiceMongo{
|
||||
colId: colId,
|
||||
t: time.Now(),
|
||||
}
|
||||
|
||||
// dependency injection
|
||||
svc.modelSvc, err = service.GetService()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// data collection
|
||||
svc.dc, _ = svc.modelSvc.GetDataCollectionById(colId)
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(1 * time.Second)
|
||||
svc.dc, _ = svc.modelSvc.GetDataCollectionById(colId)
|
||||
}
|
||||
}()
|
||||
|
||||
// data collection model service
|
||||
svc.modelColSvc = service.GetBaseServiceByColName(interfaces.ModelIdResult, svc.dc.Name)
|
||||
|
||||
return svc, nil
|
||||
}
|
||||
48
core/result/service_registry.go
Normal file
48
core/result/service_registry.go
Normal file
@@ -0,0 +1,48 @@
|
||||
package result
|
||||
|
||||
import (
|
||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type ServiceRegistry struct {
|
||||
// internals
|
||||
services sync.Map
|
||||
}
|
||||
|
||||
func (r *ServiceRegistry) Register(key string, fn interfaces.ResultServiceRegistryFn) {
|
||||
r.services.Store(key, fn)
|
||||
}
|
||||
|
||||
func (r *ServiceRegistry) Unregister(key string) {
|
||||
r.services.Delete(key)
|
||||
}
|
||||
|
||||
func (r *ServiceRegistry) Get(key string) (fn interfaces.ResultServiceRegistryFn) {
|
||||
res, ok := r.services.Load(key)
|
||||
if ok {
|
||||
fn, ok = res.(interfaces.ResultServiceRegistryFn)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return fn
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewResultServiceRegistry() (r interfaces.ResultServiceRegistry) {
|
||||
r = &ServiceRegistry{
|
||||
services: sync.Map{},
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
var _svc interfaces.ResultServiceRegistry
|
||||
|
||||
func GetResultServiceRegistry() (r interfaces.ResultServiceRegistry) {
|
||||
if _svc != nil {
|
||||
return _svc
|
||||
}
|
||||
_svc = NewResultServiceRegistry()
|
||||
return _svc
|
||||
}
|
||||
76
core/result/test/base.go
Normal file
76
core/result/test/base.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package test
|
||||
|
||||
import (
|
||||
"github.com/crawlab-team/crawlab-db/mongo"
|
||||
"github.com/crawlab-team/crawlab/core/interfaces"
|
||||
"github.com/crawlab-team/crawlab/core/models/delegate"
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
"github.com/crawlab-team/crawlab/core/models/service"
|
||||
"github.com/crawlab-team/crawlab/core/result"
|
||||
"go.uber.org/dig"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func init() {
|
||||
T = NewTest()
|
||||
}
|
||||
|
||||
var T *Test
|
||||
|
||||
type Test struct {
|
||||
// dependencies
|
||||
modelSvc service.ModelService
|
||||
resultSvc interfaces.ResultService
|
||||
|
||||
// test data
|
||||
TestColName string
|
||||
TestCol *mongo.Col
|
||||
TestDc *models.DataCollection
|
||||
}
|
||||
|
||||
func (t *Test) Setup(t2 *testing.T) {
|
||||
t2.Cleanup(t.Cleanup)
|
||||
}
|
||||
|
||||
func (t *Test) Cleanup() {
|
||||
_ = t.modelSvc.DropAll()
|
||||
}
|
||||
|
||||
func NewTest() *Test {
|
||||
var err error
|
||||
|
||||
// test
|
||||
t := &Test{
|
||||
TestColName: "test_results",
|
||||
}
|
||||
|
||||
// dependency injection
|
||||
c := dig.New()
|
||||
if err := c.Provide(service.NewService); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if err := c.Invoke(func(
|
||||
modelSvc service.ModelService,
|
||||
) {
|
||||
t.modelSvc = modelSvc
|
||||
}); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// data collection
|
||||
t.TestDc = &models.DataCollection{
|
||||
Name: t.TestColName,
|
||||
}
|
||||
if err := delegate.NewModelDelegate(t.TestDc).Add(); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
t.TestCol = mongo.GetMongoCol(t.TestColName)
|
||||
|
||||
// result service
|
||||
t.resultSvc, err = result.GetResultService(t.TestDc.GetId())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return t
|
||||
}
|
||||
67
core/result/test/service_test.go
Normal file
67
core/result/test/service_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package test
|
||||
|
||||
import (
|
||||
"github.com/crawlab-team/crawlab/core/models/models"
|
||||
"github.com/stretchr/testify/require"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestResultService_GetList(t *testing.T) {
|
||||
var err error
|
||||
T.Setup(t)
|
||||
|
||||
n := 1000
|
||||
var docs []interface{}
|
||||
for i := 0; i < n; i++ {
|
||||
d := &models.Result{
|
||||
"i": i,
|
||||
}
|
||||
docs = append(docs, d)
|
||||
}
|
||||
_, err = T.TestCol.InsertMany(docs)
|
||||
require.Nil(t, err)
|
||||
|
||||
// get all
|
||||
results, err := T.resultSvc.List(nil, nil)
|
||||
require.Nil(t, err)
|
||||
require.Equal(t, n, len(results))
|
||||
|
||||
//query := bson.M{
|
||||
// "i": bson.M{
|
||||
// "$lt": n / 2,
|
||||
// },
|
||||
//}
|
||||
//results, err = T.resultSvc.List(query, nil)
|
||||
//require.Nil(t, err)
|
||||
//require.Equal(t, n/2, len(results))
|
||||
}
|
||||
|
||||
func TestResultService_Count(t *testing.T) {
|
||||
var err error
|
||||
T.Setup(t)
|
||||
|
||||
n := 1000
|
||||
var docs []interface{}
|
||||
for i := 0; i < n; i++ {
|
||||
d := &models.Result{
|
||||
"i": i,
|
||||
}
|
||||
docs = append(docs, d)
|
||||
}
|
||||
_, err = T.TestCol.InsertMany(docs)
|
||||
require.Nil(t, err)
|
||||
|
||||
// get all
|
||||
total, err := T.resultSvc.Count(nil)
|
||||
require.Nil(t, err)
|
||||
require.Equal(t, n, total)
|
||||
|
||||
//query := bson.M{
|
||||
// "i": bson.M{
|
||||
// "$lt": n / 2,
|
||||
// },
|
||||
//}
|
||||
//total, err = T.resultSvc.Count(query)
|
||||
//require.Nil(t, err)
|
||||
//require.Equal(t, n/2, total)
|
||||
}
|
||||
Reference in New Issue
Block a user