Skip to content

Commit

Permalink
Use token based approach for system-agent
Browse files Browse the repository at this point in the history
Reduce the footprint of the system-agent RBAC
Per each cluster there will be created:
- 1 system-agent ServiceAccount

Per each plan there will be temporarily created:
- 1 Role with access to all plan secrets for each machine
- 1 Rolebinging for the role and the cluster system-agent ServiceAccount
On plan completion/failure the role and rolebinding will be rewoked

Per each machine there will be created:
- 1 Secret for the system-agent authentication, with unique JWT bound to
  the secret existence in the API server, and a namespace/name pointer
  to the plan secret
- 1 Secret for the plan execution

Signed-off-by: Danil-Grigorev <danil.grigorev@suse.com>
  • Loading branch information
Danil-Grigorev committed Oct 24, 2024
1 parent f2d9579 commit d96b0f5
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 167 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ rules:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts/token
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
Expand Down
2 changes: 1 addition & 1 deletion exp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ export CLUSTER_NAMESPACE=default
export CLUSTER_NAME=rke2
export ETCD_MACHINE_SNAPSHOT_NAME="<snapshot_name_from_the_output>"

envsubst < etcdrestore/examples/etcd-restore.yaml | kubectl apply -f -
envsubst < exp/etcdrestore/examples/etcd-restore.yaml | kubectl apply -f -
```

## Cleanup
Expand Down
6 changes: 6 additions & 0 deletions exp/etcdrestore/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ rules:
- patch
- update
- watch
- apiGroups:
- ""
resources:
- serviceaccounts/token
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
Expand Down
45 changes: 31 additions & 14 deletions exp/etcdrestore/controllers/etcdsnapshotrestore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ import (
snapshotrestorev1 "github.com/rancher/turtles/exp/etcdrestore/api/v1alpha1"
)

// InitMachine is a filter matching on init machine of the ETCD snapshot
func InitMachine(etcdMachineSnapshot *snapshotrestorev1.ETCDMachineSnapshot) collections.Func {
// initMachine is a filter matching on init machine of the ETCD snapshot
func initMachine(etcdMachineSnapshot *snapshotrestorev1.ETCDMachineSnapshot) collections.Func {
return func(machine *clusterv1.Machine) bool {
return machine.Name == etcdMachineSnapshot.Spec.MachineName
}
Expand Down Expand Up @@ -104,6 +104,7 @@ type scope struct {
//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters/status,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="",resources=secrets;events;configmaps;serviceaccounts,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="",resources=serviceaccounts/token,verbs=create
//+kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles;rolebindings,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="management.cattle.io",resources=*,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=rke2configs;rke2configs/status;rke2configs/finalizers,verbs=get;list;watch;create;update;patch;delete
Expand Down Expand Up @@ -159,7 +160,7 @@ func (r *ETCDSnapshotRestoreReconciler) reconcileNormal(ctx context.Context, etc
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

if scope.machines.Filter(InitMachine(scope.etcdMachineSnapshot)).Len() != 1 {
if scope.machines.Filter(initMachine(scope.etcdMachineSnapshot)).Len() != 1 {
return ctrl.Result{}, fmt.Errorf(
"init machine %s for snapshot %s is not found",
scope.etcdMachineSnapshot.Spec.MachineName,
Expand Down Expand Up @@ -191,15 +192,13 @@ func (r *ETCDSnapshotRestoreReconciler) reconcileNormal(ctx context.Context, etc

return ctrl.Result{}, nil
case snapshotrestorev1.ETCDSnapshotRestorePhaseStarted:
etcdSnapshotRestore.Status.Phase = snapshotrestorev1.ETCDSnapshotRestorePhaseShutdown

return ctrl.Result{}, nil
return r.preparePlanPermissions(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseShutdown:
// Stop RKE2 on all the machines.
return r.stopRKE2OnAllMachines(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseRunning:
// Restore the etcd snapshot on the init machine.
return r.restoreSnaphotOnInitMachine(ctx, scope, etcdSnapshotRestore)
return r.restoreSnapshotOnInitMachine(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseAgentRestart:
// Start RKE2 on all the machines.
return r.startRKE2OnAllMachines(ctx, scope, etcdSnapshotRestore)
Expand All @@ -212,7 +211,7 @@ func (r *ETCDSnapshotRestoreReconciler) reconcileNormal(ctx context.Context, etc
case snapshotrestorev1.ETCDSnapshotRestorePhaseJoinAgents:
return r.waitForMachinesToJoin(ctx, scope, etcdSnapshotRestore)
case snapshotrestorev1.ETCDSnapshotRestorePhaseFinished, snapshotrestorev1.ETCDSnapshotRestorePhaseFailed:
return ctrl.Result{}, nil
return r.revokePlanPermissions(ctx, scope, etcdSnapshotRestore)
}

return ctrl.Result{}, nil
Expand Down Expand Up @@ -251,6 +250,24 @@ func initScope(ctx context.Context, c client.Client, etcdSnapshotRestore *snapsh
}, nil
}

func (r *ETCDSnapshotRestoreReconciler) preparePlanPermissions(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
if err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, scope.machines.Newest(), scope.machines).Permit(ctx); err != nil {
return ctrl.Result{}, err
}

etcdSnapshotRestore.Status.Phase = snapshotrestorev1.ETCDSnapshotRestorePhaseShutdown

return ctrl.Result{}, nil
}

func (r *ETCDSnapshotRestoreReconciler) revokePlanPermissions(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
if err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, scope.machines.Newest(), scope.machines).Revoke(ctx); err != nil {
return ctrl.Result{}, err
}

return ctrl.Result{}, nil
}

func (r *ETCDSnapshotRestoreReconciler) stopRKE2OnAllMachines(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
log := log.FromContext(ctx)

Expand All @@ -259,7 +276,7 @@ func (r *ETCDSnapshotRestoreReconciler) stopRKE2OnAllMachines(ctx context.Contex
log.Info("Stopping RKE2 on machine", "machine", machine.Name)

// Get the plan secret for the machine.
applied, err := Plan(ctx, r.Client, machine).Apply(ctx, RKE2KillAll())
applied, err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, machine, scope.machines).Apply(ctx, RKE2KillAll())
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to get plan secret for machine: %w", err)
}
Expand All @@ -286,15 +303,15 @@ func (r *ETCDSnapshotRestoreReconciler) stopRKE2OnAllMachines(ctx context.Contex
return ctrl.Result{}, nil
}

func (r *ETCDSnapshotRestoreReconciler) restoreSnaphotOnInitMachine(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
func (r *ETCDSnapshotRestoreReconciler) restoreSnapshotOnInitMachine(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
log := log.FromContext(ctx)

initMachine := scope.machines.Filter(InitMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]
initMachine := scope.machines.Filter(initMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]

log.Info("Filling plan secret with etcd restore instructions", "machine", initMachine.Name)

// Get the plan secret for the machine.
applied, err := Plan(ctx, r.Client, initMachine).Apply(
applied, err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, initMachine, scope.machines).Apply(
ctx,
RemoveServerURL(),
ManifestRemoval(),
Expand All @@ -318,7 +335,7 @@ func (r *ETCDSnapshotRestoreReconciler) restoreSnaphotOnInitMachine(ctx context.
func (r *ETCDSnapshotRestoreReconciler) startRKE2OnAllMachines(ctx context.Context, scope *scope, etcdSnapshotRestore *snapshotrestorev1.ETCDSnapshotRestore) (ctrl.Result, error) {
log := log.FromContext(ctx)

initMachine := scope.machines.Filter(InitMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]
initMachine := scope.machines.Filter(initMachine(scope.etcdMachineSnapshot)).UnsortedList()[0]

// TODO: other registration methods
initMachineIP := getInternalMachineIP(initMachine)
Expand Down Expand Up @@ -350,7 +367,7 @@ func (r *ETCDSnapshotRestoreReconciler) startRKE2OnAllMachines(ctx context.Conte
StartRKE2())
}

applied, err := Plan(ctx, r.Client, machine).Apply(ctx, instructions...)
applied, err := Plan(ctx, r.Client, "restore"+etcdSnapshotRestore.Name, machine, scope.machines).Apply(ctx, instructions...)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch plan secret: %w", err)
} else if !applied.Finished {
Expand Down
101 changes: 101 additions & 0 deletions exp/etcdrestore/controllers/etcdsnapshotrestore_controller_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
Copyright © 2023 - 2024 SUSE LLC
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controllers

import (
. "github.com/onsi/ginkgo/v2"
// . "github.com/onsi/gomega"
bootstrapv1 "github.com/rancher/cluster-api-provider-rke2/bootstrap/api/v1beta1"
// corev1 "k8s.io/api/core/v1"
)

var (
rke2Config *bootstrapv1.RKE2Config
serviceAccountName string
serviceAccountNamespace string
planSecretName string
serverUrl string
pem string
systemAgentVersion string
token []byte
)

var _ = Describe("RKE2ConfigWebhook tests", func() {
// It("Should create a role with the correct properties", func() {
// role := r.createRole(planSecretName, rke2Config)

// Expect(role.ObjectMeta.Name).To(Equal(planSecretName))
// Expect(role.ObjectMeta.Namespace).To(Equal(rke2Config.Namespace))
// Expect(role.Rules[0].Verbs).To(Equal([]string{"watch", "get", "update", "list"}))
// Expect(role.Rules[0].APIGroups).To(Equal([]string{""}))
// Expect(role.Rules[0].Resources).To(Equal([]string{"secrets"}))
// Expect(role.Rules[0].ResourceNames).To(Equal([]string{planSecretName}))
// })

// It("Should create a role binding with the correct properties", func() {
// roleBinding := r.createRoleBinding(planSecretName, rke2Config)

// Expect(roleBinding.ObjectMeta.Name).To(Equal(planSecretName))
// Expect(roleBinding.ObjectMeta.Namespace).To(Equal(rke2Config.Namespace))
// Expect(roleBinding.Subjects[0].Kind).To(Equal("ServiceAccount"))
// Expect(roleBinding.Subjects[0].Name).To(Equal(planSecretName))
// Expect(roleBinding.Subjects[0].Namespace).To(Equal(rke2Config.Namespace))
// Expect(roleBinding.RoleRef.APIGroup).To(Equal(rbacv1.GroupName))
// Expect(roleBinding.RoleRef.Kind).To(Equal("Role"))
// Expect(roleBinding.RoleRef.Name).To(Equal(planSecretName))
// })

// It("Should create a role with the correct properties", func() {
// role := r.createRole(planSecretName, rke2Config)

// Expect(role.ObjectMeta.Name).To(Equal(planSecretName))
// Expect(role.ObjectMeta.Namespace).To(Equal(rke2Config.Namespace))
// Expect(role.Rules[0].Verbs).To(Equal([]string{"watch", "get", "update", "list"}))
// Expect(role.Rules[0].APIGroups).To(Equal([]string{""}))
// Expect(role.Rules[0].Resources).To(Equal([]string{"secrets"}))
// Expect(role.Rules[0].ResourceNames).To(Equal([]string{planSecretName}))
// })

// It("Should create a role binding with the correct properties", func() {
// roleBinding := r.createRoleBinding(planSecretName, rke2Config)

// Expect(roleBinding.ObjectMeta.Name).To(Equal(planSecretName))
// Expect(roleBinding.ObjectMeta.Namespace).To(Equal(rke2Config.Namespace))
// Expect(roleBinding.Subjects[0].Kind).To(Equal("ServiceAccount"))
// Expect(roleBinding.Subjects[0].Name).To(Equal(planSecretName))
// Expect(roleBinding.Subjects[0].Namespace).To(Equal(rke2Config.Namespace))
// Expect(roleBinding.RoleRef.APIGroup).To(Equal(rbacv1.GroupName))
// Expect(roleBinding.RoleRef.Kind).To(Equal("Role"))
// Expect(roleBinding.RoleRef.Name).To(Equal(planSecretName))
// })

// It("Should create a service account secret with the correct properties", func() {
// secret := r.createServiceAccountSecret(planSecretName, rke2Config)

// Expect(secret.ObjectMeta.Name).To(Equal(fmt.Sprintf("%s-token", planSecretName)))
// Expect(secret.ObjectMeta.Namespace).To(Equal(rke2Config.Namespace))
// Expect(secret.ObjectMeta.Annotations["kubernetes.io/service-account.name"]).To(Equal(planSecretName))
// Expect(secret.ObjectMeta.Labels[serviceAccountSecretLabel]).To(Equal(planSecretName))
// Expect(secret.Type).To(Equal(corev1.SecretTypeServiceAccountToken))
// })

// It("Should return service account token when secret is present and populated", func() {
// token, err := r.issueBootstrapToken(ctx, planSecretName)
// Expect(err).ToNot(HaveOccurred())
// Expect(token).To(Equal([]byte("test-token")))
// })
})
Loading

0 comments on commit d96b0f5

Please sign in to comment.