-
Notifications
You must be signed in to change notification settings - Fork 170
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
518 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package actuator | ||
|
||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the Apache License 2.0. | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/Azure/ARO-RP/pkg/api" | ||
) | ||
|
||
// controller updates the monitor document with the list of buckets balanced between | ||
// registered workers | ||
func (s *service) controller(ctx context.Context) error { | ||
var doc *api.BucketServiceDocument | ||
var err error | ||
|
||
// if we know we're not the controller, attempt to gain the lease on the monitor | ||
// document | ||
if !s.isController { | ||
doc, err = s.dbBucketServices.TryLease(ctx, s.serviceName) | ||
if err != nil || doc == nil { | ||
return err | ||
} | ||
s.isController = true | ||
} | ||
|
||
// we know we're not the controller; give up | ||
if !s.isController { | ||
return nil | ||
} | ||
|
||
// we think we're the controller. Gather up all the registered workers | ||
// including ourself, balance buckets between them and write the bucket | ||
// allocations to the database. If it turns out that we're not the controller, | ||
// the patch will fail | ||
_, err = s.dbBucketServices.PatchWithLease(ctx, doc.ID, func(doc *api.BucketServiceDocument) error { | ||
docs, err := s.dbBucketServices.ListBucketServices(ctx, s.serviceName) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
var workers []string | ||
if docs != nil { | ||
workers = make([]string, 0, len(docs.BucketServiceDocuments)) | ||
for _, doc := range docs.BucketServiceDocuments { | ||
workers = append(workers, doc.ID) | ||
} | ||
} | ||
|
||
doc.Buckets = s.b.Balance(workers, doc.Buckets) | ||
|
||
return nil | ||
}) | ||
if err != nil && err.Error() == "lost lease" { | ||
s.isController = false | ||
} | ||
return err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
package actuator | ||
|
||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the Apache License 2.0. | ||
|
||
import ( | ||
"context" | ||
"net/http" | ||
"sync" | ||
"sync/atomic" | ||
"time" | ||
|
||
"github.com/sirupsen/logrus" | ||
|
||
"github.com/Azure/ARO-RP/pkg/api" | ||
"github.com/Azure/ARO-RP/pkg/database" | ||
"github.com/Azure/ARO-RP/pkg/database/cosmosdb" | ||
"github.com/Azure/ARO-RP/pkg/env" | ||
"github.com/Azure/ARO-RP/pkg/metrics" | ||
"github.com/Azure/ARO-RP/pkg/proxy" | ||
"github.com/Azure/ARO-RP/pkg/util/buckets" | ||
"github.com/Azure/ARO-RP/pkg/util/heartbeat" | ||
"github.com/Azure/ARO-RP/pkg/util/recover" | ||
) | ||
|
||
type Runnable interface { | ||
Run(context.Context, <-chan struct{}, chan<- struct{}) error | ||
} | ||
|
||
type service struct { | ||
dialer proxy.Dialer | ||
baseLog *logrus.Entry | ||
env env.Interface | ||
|
||
dbBucketServices database.BucketServices | ||
dbOpenShiftClusters database.OpenShiftClusters | ||
dbMaintenanceManifests database.MaintenanceManifests | ||
serviceName string | ||
|
||
m metrics.Emitter | ||
mu sync.RWMutex | ||
b buckets.BucketWorker | ||
isController bool | ||
stopping *atomic.Bool | ||
|
||
lastChangefeed atomic.Value //time.Time | ||
lastBucketlist atomic.Value //time.Time | ||
startTime time.Time | ||
} | ||
|
||
func NewService(log *logrus.Entry, dialer proxy.Dialer, dbBucketServices database.BucketServices, dbOpenShiftClusters database.OpenShiftClusters, dbMaintenanceManifests database.MaintenanceManifests, m metrics.Emitter) Runnable { | ||
s := &service{ | ||
baseLog: log, | ||
dialer: dialer, | ||
|
||
dbBucketServices: dbBucketServices, | ||
dbOpenShiftClusters: dbOpenShiftClusters, | ||
|
||
m: m, | ||
serviceName: "actuator", | ||
stopping: &atomic.Bool{}, | ||
|
||
startTime: time.Now(), | ||
} | ||
|
||
s.b = buckets.NewBucketWorker(log, s.worker, &s.mu) | ||
return s | ||
} | ||
|
||
func (s *service) Run(ctx context.Context, stop <-chan struct{}, done chan<- struct{}) error { | ||
defer recover.Panic(s.baseLog) | ||
|
||
_, err := s.dbBucketServices.Create(ctx, &api.BucketServiceDocument{ | ||
ServiceName: s.serviceName, | ||
ServiceRole: "controller", | ||
}) | ||
if err != nil && !cosmosdb.IsErrorStatusCode(err, http.StatusPreconditionFailed) { | ||
return err | ||
} | ||
|
||
// fill the cache from the database change feed | ||
go s.changefeed(ctx, s.baseLog.WithField("component", "changefeed"), nil) | ||
|
||
t := time.NewTicker(10 * time.Second) | ||
defer t.Stop() | ||
|
||
if stop != nil { | ||
go func() { | ||
defer recover.Panic(s.baseLog) | ||
|
||
<-stop | ||
s.baseLog.Print("stopping") | ||
s.stopping.Store(true) | ||
}() | ||
} | ||
|
||
go heartbeat.EmitHeartbeat(s.baseLog, s.m, s.serviceName+".heartbeat", nil, s.checkReady) | ||
|
||
for { | ||
if s.stopping.Load() { | ||
break | ||
} | ||
|
||
// register ourself as a worker | ||
err = s.dbBucketServices.BucketServiceHeartbeat(ctx, s.serviceName) | ||
if err != nil { | ||
s.baseLog.Error(err) | ||
} | ||
|
||
// try to become controller and share buckets across registered monitors | ||
err = s.controller(ctx) | ||
if err != nil { | ||
s.baseLog.Error(err) | ||
} | ||
|
||
// read our bucket allocation from the controller | ||
buckets, err := s.dbBucketServices.ListBuckets(ctx, s.serviceName) | ||
s.b.LoadBuckets(buckets) | ||
if err != nil { | ||
s.baseLog.Error(err) | ||
} else { | ||
s.lastBucketlist.Store(time.Now()) | ||
} | ||
|
||
<-t.C | ||
} | ||
|
||
s.baseLog.Print("exiting") | ||
close(done) | ||
return nil | ||
} | ||
|
||
// checkReady checks the ready status of the monitor to make it consistent | ||
// across the /healthz/ready endpoint and emitted metrics. We wait for 2 | ||
// minutes before indicating health. This ensures that there will be a gap in | ||
// our health metric if we crash or restart. | ||
func (s *service) checkReady() bool { | ||
lastBucketTime, ok := s.lastBucketlist.Load().(time.Time) | ||
if !ok { | ||
return false | ||
} | ||
lastChangefeedTime, ok := s.lastChangefeed.Load().(time.Time) | ||
if !ok { | ||
return false | ||
} | ||
return (time.Since(lastBucketTime) < time.Minute) && // did we list buckets successfully recently? | ||
(time.Since(lastChangefeedTime) < time.Minute) && // did we process the change feed recently? | ||
(time.Since(s.startTime) > 2*time.Minute) // are we running for at least 2 minutes? | ||
} |
Oops, something went wrong.