Skip to content

Commit caeafab

Browse files
mircea-cosbucfealebenpae
authored andcommitted
CLOUDP-230405: Add appDB multi-cluster member config (#3725)
# Summary Based on 10gen/ops-manager-kubernetes#3447 , this patch adds multi-cluster support for appdb member configuration. This also requires a patch in the community operator: mongodb/mongodb-kubernetes-operator#1610 ## Documentation changes * [ ] Add an entry to [release notes](.../RELEASE_NOTES.md). * [ ] When needed, make sure you create a new [DOCSP ticket](https://jira.mongodb.org/projects/DOCSP) that documents your change. ## Changes to CRDs * [ ] Add `slaskawi`(Sebastian) and `@giohan` (George) as reviewers. * [ ] Make sure any changes are reflected on `/public/samples` directory.
1 parent 30c839c commit caeafab

File tree

10 files changed

+488
-221
lines changed

10 files changed

+488
-221
lines changed

.evergreen.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -1534,7 +1534,7 @@ tasks:
15341534
- func: "e2e_test"
15351535

15361536
# E2E tests for Ops Manager (sorted alphabetically):
1537-
- name: e2e_om_appdb_agent_flags
1537+
- name: e2e_om_appdb_flags_and_config
15381538
tags: ["patch-run"]
15391539
commands:
15401540
- func: "e2e_test"
@@ -2293,7 +2293,7 @@ task_groups:
22932293
- e2e_multi_cluster_appdb_disaster_recovery_force_reconfigure
22942294
- e2e_multi_cluster_om_networking_clusterwide
22952295
# Reused OM tests with AppDB Multi-Cluster topology
2296-
- e2e_om_appdb_agent_flags
2296+
- e2e_om_appdb_flags_and_config
22972297
- e2e_om_appdb_upgrade
22982298
- e2e_om_appdb_monitoring_tls
22992299
- e2e_om_ops_manager_backup
@@ -2343,7 +2343,7 @@ task_groups:
23432343
- e2e_multi_cluster_appdb_disaster_recovery_force_reconfigure
23442344
- e2e_multi_cluster_om_networking_clusterwide
23452345
# Reused OM tests with AppDB Multi-Cluster topology
2346-
- e2e_om_appdb_agent_flags
2346+
- e2e_om_appdb_flags_and_config
23472347
- e2e_om_appdb_upgrade
23482348
- e2e_om_appdb_monitoring_tls
23492349
- e2e_om_ops_manager_backup
@@ -2395,7 +2395,7 @@ task_groups:
23952395
<<: *setup_group
23962396
<<: *setup_and_teardown_task_cloudqa
23972397
tasks:
2398-
- e2e_om_appdb_agent_flags
2398+
- e2e_om_appdb_flags_and_config
23992399
- e2e_om_appdb_monitoring_tls
24002400
- e2e_om_appdb_multi_change
24012401
- e2e_om_appdb_scale_up_down
@@ -2439,7 +2439,7 @@ task_groups:
24392439
<<: *setup_group
24402440
<<: *setup_and_teardown_task_cloudqa
24412441
tasks:
2442-
- e2e_om_appdb_agent_flags
2442+
- e2e_om_appdb_flags_and_config
24432443
- e2e_om_appdb_monitoring_tls
24442444
- e2e_om_appdb_multi_change
24452445
- e2e_om_appdb_scale_up_down

api/v1/om/appdb_types.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -530,8 +530,9 @@ func (m *AppDBSpec) GetClusterSpecList() []mdbv1.ClusterSpecItem {
530530
} else {
531531
return []mdbv1.ClusterSpecItem{
532532
{
533-
ClusterName: multicluster.LegacyCentralClusterName,
534-
Members: m.Members,
533+
ClusterName: multicluster.LegacyCentralClusterName,
534+
Members: m.Members,
535+
MemberConfig: m.GetMemberOptions(),
535536
},
536537
}
537538
}

controllers/operator/appdbreplicaset_controller.go

+70-24
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ import (
4646
corev1 "k8s.io/api/core/v1"
4747
apiErrors "k8s.io/apimachinery/pkg/api/errors"
4848
"k8s.io/apimachinery/pkg/types"
49+
4950
"sigs.k8s.io/controller-runtime/pkg/client"
5051
"sigs.k8s.io/controller-runtime/pkg/reconcile"
5152

@@ -73,6 +74,7 @@ import (
7374
"github.com/mongodb/mongodb-kubernetes-operator/pkg/kube/secret"
7475
"github.com/mongodb/mongodb-kubernetes-operator/pkg/kube/statefulset"
7576
"go.uber.org/zap"
77+
"k8s.io/utils/ptr"
7678
)
7779

7880
type agentType string
@@ -647,7 +649,7 @@ func (r *ReconcileAppDbReplicaSet) ReconcileAppDB(ctx context.Context, opsManage
647649
// this doesn't requeue the reconciliation immediately, the calling OM controller
648650
// requeues after Ops Manager has been fully configured.
649651
log.Infof("Requeuing reconciliation to configure Monitoring in Ops Manager.")
650-
// FIXME: use correct MembersOption for scaler
652+
651653
return r.updateStatus(ctx, opsManager, workflow.Pending("Enabling monitoring").Requeue(), log, appDbStatusOption, status.AppDBMemberOptions(appDBScalers...))
652654
}
653655

@@ -990,12 +992,9 @@ func (r *ReconcileAppDbReplicaSet) buildAppDbAutomationConfig(ctx context.Contex
990992

991993
}
992994

993-
// get member options from appDB spec
994-
appDBSpec := opsManager.Spec.AppDB
995-
memberOptions := appDBSpec.GetMemberOptions()
996-
997995
processList := r.generateProcessList(opsManager)
998-
existingAutomationMemberIds, nextId := getExistingAutomationMemberIds(existingAutomationConfig)
996+
existingAutomationMembers, nextId := getExistingAutomationReplicaSetMembers(existingAutomationConfig)
997+
memberOptions := r.generateMemberOptions(opsManager, existingAutomationMembers)
999998
replicasThisReconciliation := 0
1000999
// we want to use all member clusters to maintain the same process list despite having some clusters down
10011000
for _, memberCluster := range r.getAllMemberClusters() {
@@ -1075,8 +1074,8 @@ func (r *ReconcileAppDbReplicaSet) buildAppDbAutomationConfig(ctx context.Contex
10751074
AddModifications(func(automationConfig *automationconfig.AutomationConfig) {
10761075
if len(automationConfig.ReplicaSets) == 1 {
10771076
for idx, member := range automationConfig.ReplicaSets[0].Members {
1078-
if existingId, ok := existingAutomationMemberIds[member.Host]; ok {
1079-
automationConfig.ReplicaSets[0].Members[idx].Id = existingId
1077+
if existingMember, ok := existingAutomationMembers[member.Host]; ok {
1078+
automationConfig.ReplicaSets[0].Members[idx].Id = existingMember.Id
10801079
} else {
10811080
automationConfig.ReplicaSets[0].Members[idx].Id = nextId
10821081
nextId = nextId + 1
@@ -1145,48 +1144,95 @@ func shouldPerformForcedReconfigure(annotations map[string]string) bool {
11451144
return false
11461145
}
11471146

1148-
func getExistingAutomationMemberIds(automationConfig automationconfig.AutomationConfig) (map[string]int, int) {
1147+
func getExistingAutomationReplicaSetMembers(automationConfig automationconfig.AutomationConfig) (map[string]automationconfig.ReplicaSetMember, int) {
11491148
nextId := 0
1150-
existingIds := map[string]int{}
1149+
existingMembers := map[string]automationconfig.ReplicaSetMember{}
11511150
if len(automationConfig.ReplicaSets) != 1 {
1152-
return existingIds, nextId
1151+
return existingMembers, nextId
11531152
}
11541153
for _, member := range automationConfig.ReplicaSets[0].Members {
1155-
existingIds[member.Host] = member.Id
1154+
existingMembers[member.Host] = member
11561155
if member.Id >= nextId {
11571156
nextId = member.Id + 1
11581157
}
11591158
}
1160-
return existingIds, nextId
1159+
return existingMembers, nextId
1160+
}
1161+
1162+
func (r *ReconcileAppDbReplicaSet) generateProcessHostnames(opsManager *omv1.MongoDBOpsManager, memberCluster multicluster.MemberCluster) []string {
1163+
members := scale.ReplicasThisReconciliation(scalers.GetAppDBScaler(opsManager, memberCluster.Name, r.getMemberClusterIndex(memberCluster.Name), r.memberClusters))
1164+
var hostnames []string
1165+
if opsManager.Spec.AppDB.IsMultiCluster() {
1166+
hostnames = dns.GetMultiClusterProcessHostnames(opsManager.Spec.AppDB.GetName(), opsManager.GetNamespace(), memberCluster.Index, members, opsManager.Spec.GetClusterDomain(), nil)
1167+
} else {
1168+
hostnames, _ = dns.GetDNSNames(opsManager.Spec.AppDB.GetName(), opsManager.Spec.AppDB.ServiceName(), opsManager.GetNamespace(), opsManager.Spec.GetClusterDomain(), members, nil)
1169+
}
1170+
return hostnames
11611171
}
11621172

11631173
func (r *ReconcileAppDbReplicaSet) generateProcessList(opsManager *omv1.MongoDBOpsManager) []automationconfig.Process {
11641174
var processList []automationconfig.Process
11651175
// We want all clusters to generate stable process list in case of some clusters being down. Process list cannot change regardless of the cluster health.
11661176
for _, memberCluster := range r.getAllMemberClusters() {
1167-
members := scale.ReplicasThisReconciliation(scalers.GetAppDBScaler(opsManager, memberCluster.Name, r.getMemberClusterIndex(memberCluster.Name), r.memberClusters))
1168-
var hostnames []string
1169-
if opsManager.Spec.AppDB.IsMultiCluster() {
1170-
hostnames = dns.GetMultiClusterProcessHostnames(opsManager.Spec.AppDB.GetName(), opsManager.GetNamespace(), memberCluster.Index, members, opsManager.Spec.GetClusterDomain(), nil)
1171-
} else {
1172-
hostnames, _ = dns.GetDNSNames(opsManager.Spec.AppDB.GetName(), opsManager.Spec.AppDB.ServiceName(), opsManager.GetNamespace(), opsManager.Spec.GetClusterDomain(), members, nil)
1173-
}
1174-
1177+
hostnames := r.generateProcessHostnames(opsManager, memberCluster)
11751178
for idx, hostname := range hostnames {
1176-
processList = append(processList, automationconfig.Process{
1179+
process := automationconfig.Process{
11771180
Name: fmt.Sprintf("%s-%d", opsManager.Spec.AppDB.NameForCluster(memberCluster.Index), idx),
11781181
HostName: hostname,
1179-
})
1182+
}
1183+
processList = append(processList, process)
11801184
}
11811185
}
11821186
return processList
11831187
}
11841188

1189+
func (r *ReconcileAppDbReplicaSet) generateMemberOptions(opsManager *omv1.MongoDBOpsManager, previousMembers map[string]automationconfig.ReplicaSetMember) []automationconfig.MemberOptions {
1190+
var memberOptionsList []automationconfig.MemberOptions
1191+
for _, memberCluster := range r.getAllMemberClusters() {
1192+
hostnames := r.generateProcessHostnames(opsManager, memberCluster)
1193+
memberConfig := make([]automationconfig.MemberOptions, 0)
1194+
if memberCluster.Active {
1195+
memberConfigForCluster := opsManager.Spec.AppDB.GetMemberClusterSpecByName(memberCluster.Name).MemberConfig
1196+
if memberConfigForCluster != nil {
1197+
memberConfig = append(memberConfig, memberConfigForCluster...)
1198+
}
1199+
}
1200+
for idx, hostname := range hostnames {
1201+
memberOptions := automationconfig.MemberOptions{}
1202+
if idx < len(memberConfig) { // There are member options configured in the spec
1203+
memberOptions.Votes = memberConfig[idx].Votes
1204+
memberOptions.Priority = memberConfig[idx].Priority
1205+
memberOptions.Tags = memberConfig[idx].Tags
1206+
} else {
1207+
// There are three cases we might not have memberOptions in spec:
1208+
// 1. user never specified member config in the spec
1209+
// 2. user scaled down members e.g. from 5 to 2 removing memberConfig elements at the same time
1210+
// 3. user removed whole clusterSpecItem from the list (removing cluster entirely)
1211+
// For 2. and 3. we should have those members in existing AC
1212+
if replicaSetMember, ok := previousMembers[hostname]; ok {
1213+
memberOptions.Votes = replicaSetMember.Votes
1214+
if replicaSetMember.Priority != nil {
1215+
memberOptions.Priority = ptr.To(fmt.Sprintf("%f", *replicaSetMember.Priority))
1216+
}
1217+
memberOptions.Tags = replicaSetMember.Tags
1218+
1219+
} else {
1220+
// If the member does not exist in the previous automation config, we populate the member options with defaults
1221+
memberOptions.Votes = ptr.To(1)
1222+
memberOptions.Priority = ptr.To("1.0")
1223+
}
1224+
}
1225+
memberOptionsList = append(memberOptionsList, memberOptions)
1226+
}
1227+
1228+
}
1229+
return memberOptionsList
1230+
}
1231+
11851232
func (r *ReconcileAppDbReplicaSet) generateHeadlessHostnamesForMonitoring(opsManager *omv1.MongoDBOpsManager) []string {
11861233
var hostnames []string
11871234
// We want all clusters to generate stable process list in case of some clusters being down. Process list cannot change regardless of the cluster health.
11881235
for _, memberCluster := range r.getAllMemberClusters() {
1189-
// TODO for now scaling is disabled - we create all desired processes
11901236
members := scale.ReplicasThisReconciliation(scalers.GetAppDBScaler(opsManager, memberCluster.Name, r.getMemberClusterIndex(memberCluster.Name), r.memberClusters))
11911237
if opsManager.Spec.AppDB.IsMultiCluster() {
11921238
hostnames = append(hostnames, dns.GetMultiClusterHostnamesForMonitoring(opsManager.Spec.AppDB.GetName(), opsManager.GetNamespace(), memberCluster.Index, members)...)

docker/mongodb-enterprise-tests/kubetester/automation_config_tester.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def get_replica_set_processes(self, rs_name: str) -> List[Dict]:
2626

2727
def get_replica_set_members(self, rs_name: str) -> List[Dict]:
2828
replica_set = ([rs for rs in self.automation_config["replicaSets"] if rs["_id"] == rs_name])[0]
29-
return replica_set["members"]
29+
return sorted(replica_set["members"], key=lambda member: member["_id"])
3030

3131
def get_mongos_processes(self):
3232
""" " Returns all mongos processes in deployment. We don't need to filter by sharded cluster name as

docker/mongodb-enterprise-tests/tests/multicluster/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def create_service_entries_objects(
256256
def cluster_spec_list(
257257
member_cluster_names: List[str],
258258
members: List[int],
259-
member_configs: Optional[List[Dict]] = None,
259+
member_configs: Optional[List[List[Dict]]] = None,
260260
backup_configs: Optional[List[Dict]] = None,
261261
):
262262
if member_configs is None and backup_configs is None:

docker/mongodb-enterprise-tests/tests/opsmanager/withMonitoredAppDB/conftest.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def enable_multi_cluster_deployment(
3434
resource: MongoDBOpsManager,
3535
om_cluster_spec_list: Optional[list[int]] = None,
3636
appdb_cluster_spec_list: Optional[list[int]] = None,
37+
appdb_member_configs: Optional[list[list[dict]]] = None,
3738
):
3839
resource["spec"]["topology"] = "MultiCluster"
3940
backup_configs = None
@@ -75,6 +76,6 @@ def enable_multi_cluster_deployment(
7576
)
7677
resource["spec"]["applicationDatabase"]["topology"] = "MultiCluster"
7778
resource["spec"]["applicationDatabase"]["clusterSpecList"] = cluster_spec_list(
78-
get_appdb_member_cluster_names(), appdb_cluster_spec_list
79+
get_appdb_member_cluster_names(), appdb_cluster_spec_list, appdb_member_configs
7980
)
8081
resource.api = kubernetes.client.CustomObjectsApi(api_client=get_central_cluster_client())

0 commit comments

Comments
 (0)