-
Notifications
You must be signed in to change notification settings - Fork 164
/
sequencer_failover_test.go
174 lines (148 loc) · 6.77 KB
/
sequencer_failover_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
package op_e2e
import (
"context"
"sort"
"testing"
"github.com/stretchr/testify/require"
"github.com/ethereum-optimism/optimism/op-conductor/consensus"
"github.com/ethereum-optimism/optimism/op-service/retry"
)
// [Category: Initial Setup]
// In this test, we test that we can successfully setup a working cluster.
func TestSequencerFailover_SetupCluster(t *testing.T) {
_, conductors, cleanup := setupSequencerFailoverTest(t)
defer cleanup()
require.Equal(t, 3, len(conductors), "Expected 3 conductors")
for _, con := range conductors {
require.NotNil(t, con, "Expected conductor to be non-nil")
}
}
// [Category: conductor rpc]
// In this test, we test all rpcs exposed by conductor.
func TestSequencerFailover_ConductorRPC(t *testing.T) {
ctx := context.Background()
sys, conductors, cleanup := setupSequencerFailoverTest(t)
defer cleanup()
// SequencerHealthy, Leader, AddServerAsVoter are used in setup already.
// Test ClusterMembership
t.Log("Testing ClusterMembership")
c1 := conductors[Sequencer1Name]
c2 := conductors[Sequencer2Name]
c3 := conductors[Sequencer3Name]
membership, err := c1.client.ClusterMembership(ctx)
require.NoError(t, err)
require.Equal(t, 3, len(membership), "Expected 3 members in cluster")
ids := make([]string, 0)
for _, member := range membership {
ids = append(ids, member.ID)
require.Equal(t, consensus.Voter, member.Suffrage, "Expected all members to be voters")
}
sort.Strings(ids)
require.Equal(t, []string{Sequencer1Name, Sequencer2Name, Sequencer3Name}, ids, "Expected all sequencers to be in cluster")
// Test Active & Pause & Resume
t.Log("Testing Active & Pause & Resume")
active, err := c1.client.Active(ctx)
require.NoError(t, err)
require.True(t, active, "Expected conductor to be active")
err = c1.client.Pause(ctx)
require.NoError(t, err)
active, err = c1.client.Active(ctx)
require.NoError(t, err)
require.False(t, active, "Expected conductor to be paused")
err = c1.client.Resume(ctx)
require.NoError(t, err)
active, err = c1.client.Active(ctx)
require.NoError(t, err)
require.True(t, active, "Expected conductor to be active")
t.Log("Testing LeaderWithID")
leader1, err := c1.client.LeaderWithID(ctx)
require.NoError(t, err)
leader2, err := c2.client.LeaderWithID(ctx)
require.NoError(t, err)
leader3, err := c3.client.LeaderWithID(ctx)
require.NoError(t, err)
require.Equal(t, leader1.ID, leader2.ID, "Expected leader ID to be the same")
require.Equal(t, leader1.ID, leader3.ID, "Expected leader ID to be the same")
t.Log("Testing TransferLeader")
lid, leader := findLeader(t, conductors)
err = leader.client.TransferLeader(ctx)
require.NoError(t, err, "Expected leader to transfer leadership to another node")
_ = waitForLeadershipChange(t, leader, lid, conductors, sys)
// old leader now became follower, we're trying to transfer leadership directly back to it.
t.Log("Testing TransferLeaderToServer")
fid, follower := lid, leader
lid, leader = findLeader(t, conductors)
err = leader.client.TransferLeaderToServer(ctx, fid, follower.ConsensusEndpoint())
require.NoError(t, err, "Expected leader to transfer leadership to follower")
newID := waitForLeadershipChange(t, leader, lid, conductors, sys)
require.Equal(t, fid, newID, "Expected leader to transfer to %s", fid)
leader = follower
// Test AddServerAsNonvoter, do not start a new sequencer just for this purpose, use Sequencer3's rpc to start conductor.
// This is fine as this mainly tests conductor's ability to add itself into the raft consensus cluster as a nonvoter.
t.Log("Testing AddServerAsNonvoter")
nonvoter, err := retry.Do[*conductor](ctx, maxSetupRetries, retryStrategy, func() (*conductor, error) {
return setupConductor(
t, VerifierName, t.TempDir(),
sys.RollupEndpoint(Sequencer3Name),
sys.NodeEndpoint(Sequencer3Name),
findAvailablePort(t),
false,
*sys.RollupConfig,
)
})
require.NoError(t, err)
defer func() {
err = nonvoter.service.Stop(ctx)
require.NoError(t, err)
}()
err = leader.client.AddServerAsNonvoter(ctx, VerifierName, nonvoter.ConsensusEndpoint())
require.NoError(t, err, "Expected leader to add non-voter")
membership, err = leader.client.ClusterMembership(ctx)
require.NoError(t, err)
require.Equal(t, 4, len(membership), "Expected 4 members in cluster")
require.Equal(t, consensus.Nonvoter, membership[3].Suffrage, "Expected last member to be non-voter")
t.Log("Testing RemoveServer, call remove on follower, expected to fail")
lid, leader = findLeader(t, conductors)
fid, follower = findFollower(t, conductors)
err = follower.client.RemoveServer(ctx, lid)
require.ErrorContains(t, err, "node is not the leader", "Expected follower to fail to remove leader")
membership, err = c1.client.ClusterMembership(ctx)
require.NoError(t, err)
require.Equal(t, 4, len(membership), "Expected 4 members in cluster")
t.Log("Testing RemoveServer, call remove on leader, expect non-voter to be removed")
err = leader.client.RemoveServer(ctx, VerifierName)
require.NoError(t, err, "Expected leader to remove non-voter")
membership, err = c1.client.ClusterMembership(ctx)
require.NoError(t, err)
require.Equal(t, 3, len(membership), "Expected 2 members in cluster after removal")
require.NotContains(t, membership, VerifierName, "Expected follower to be removed from cluster")
t.Log("Testing RemoveServer, call remove on leader, expect voter to be removed")
err = leader.client.RemoveServer(ctx, fid)
require.NoError(t, err, "Expected leader to remove follower")
membership, err = c1.client.ClusterMembership(ctx)
require.NoError(t, err)
require.Equal(t, 2, len(membership), "Expected 2 members in cluster after removal")
require.NotContains(t, membership, fid, "Expected follower to be removed from cluster")
}
// [Category: Sequencer Failover]
// Test that the sequencer can successfully failover to a new sequencer once the active sequencer goes down.
func TestSequencerFailover_ActiveSequencerDown(t *testing.T) {
sys, conductors, cleanup := setupSequencerFailoverTest(t)
defer cleanup()
ctx := context.Background()
leaderId, leader := findLeader(t, conductors)
err := sys.RollupNodes[leaderId].Stop(ctx) // Stop the current leader sequencer
require.NoError(t, err)
// The leadership change should occur with no errors
newID := waitForLeadershipChange(t, leader, leaderId, conductors, sys)
require.NotEqual(t, leaderId, newID, "Expected leader to change")
// Confirm the new leader is different from the old leader
newLeaderId, _ := findLeader(t, conductors)
require.NotEqual(t, leaderId, newLeaderId, "Expected leader to change")
// Check that the sequencer is healthy
require.True(t, healthy(t, ctx, conductors[newLeaderId]))
// Check if the new leader is sequencing
active, err := sys.RollupClient(newLeaderId).SequencerActive(ctx)
require.NoError(t, err)
require.True(t, active, "Expected new leader to be sequencing")
}