-
Notifications
You must be signed in to change notification settings - Fork 418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FLINK-33634] Add Conditions to Flink CRD's Status field #749
base: main
Are you sure you want to change the base?
Changes from all commits
4a89a0a
976394f
e752a4b
739a159
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,6 +68,9 @@ spec: | |
... | ||
status: | ||
clusterInfo: | ||
... | ||
conditions: | ||
|
||
... | ||
jobManagerDeploymentStatus: READY | ||
jobStatus: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,15 +22,20 @@ | |
import org.apache.flink.kubernetes.operator.api.lifecycle.ResourceLifecycleState; | ||
import org.apache.flink.kubernetes.operator.api.spec.AbstractFlinkSpec; | ||
import org.apache.flink.kubernetes.operator.api.spec.JobState; | ||
import org.apache.flink.kubernetes.operator.api.utils.ConditionUtils; | ||
|
||
import com.fasterxml.jackson.annotation.JsonIgnore; | ||
import io.fabric8.kubernetes.api.model.Condition; | ||
import io.fabric8.kubernetes.model.annotation.PrinterColumn; | ||
import lombok.AllArgsConstructor; | ||
import lombok.Data; | ||
import lombok.NoArgsConstructor; | ||
import lombok.experimental.SuperBuilder; | ||
import org.apache.commons.lang3.StringUtils; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
/** Last observed common status of the Flink deployment/Flink SessionJob. */ | ||
@Experimental | ||
@Data | ||
|
@@ -51,6 +56,8 @@ public abstract class CommonStatus<SPEC extends AbstractFlinkSpec> { | |
// column. | ||
private ResourceLifecycleState lifecycleState; | ||
|
||
private List<Condition> conditions = new ArrayList<>(); | ||
|
||
/** | ||
* Current reconciliation status of this resource. | ||
* | ||
|
@@ -101,4 +108,67 @@ public ResourceLifecycleState getLifecycleState() { | |
* loop immediately. For example autoscaler overrides have changed and we need to apply them. | ||
*/ | ||
@JsonIgnore @Internal private boolean immediateReconciliationNeeded = false; | ||
|
||
public List<Condition> getConditions() { | ||
switch (getLifecycleState()) { | ||
case CREATED: | ||
updateConditionIfNotExist( | ||
conditions, | ||
ConditionUtils.notReady( | ||
"The resource was created in Kubernetes but not yet handled by the operator")); | ||
break; | ||
case SUSPENDED: | ||
updateConditionIfNotExist( | ||
conditions, | ||
ConditionUtils.notReady("The resource (job) has been suspended")); | ||
break; | ||
case UPGRADING: | ||
updateConditionIfNotExist( | ||
conditions, ConditionUtils.notReady("The resource is being upgraded")); | ||
break; | ||
case DEPLOYED: | ||
updateConditionIfNotExist( | ||
conditions, | ||
ConditionUtils.ready( | ||
"The resource is deployed, but it’s not yet considered to be stable and might be rolled back in the future")); | ||
break; | ||
case ROLLING_BACK: | ||
updateConditionIfNotExist( | ||
conditions, | ||
ConditionUtils.notReady( | ||
"The resource is being rolled back to the last stable spec")); | ||
break; | ||
case ROLLED_BACK: | ||
updateConditionIfNotExist( | ||
conditions, | ||
ConditionUtils.ready("The resource is deployed with the last stable spec")); | ||
break; | ||
case FAILED: | ||
updateConditionIfNotExist(conditions, ConditionUtils.error("failed")); | ||
break; | ||
case STABLE: | ||
updateConditionIfNotExist( | ||
conditions, | ||
ConditionUtils.ready( | ||
"The resource deployment is considered to be stable and won’t be rolled back")); | ||
break; | ||
} | ||
|
||
return conditions; | ||
} | ||
|
||
private void updateConditionIfNotExist(List<Condition> conditions, Condition newCondition) { | ||
if (conditions.isEmpty()) { | ||
conditions.add(newCondition); | ||
} | ||
if (conditions.stream() | ||
.noneMatch(condition -> condition.getType().equals(newCondition.getType()))) { | ||
conditions.add(newCondition); | ||
} else if (conditions.removeIf( | ||
condition -> | ||
!(condition.getReason().equals(newCondition.getReason()) | ||
&& condition.getMessage().equals(newCondition.getMessage())))) { | ||
conditions.add(newCondition); | ||
} | ||
Comment on lines
+164
to
+172
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please explain what this is supposed to do exactly (and also add some docs to the code)? I am a bit confused by the logic There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, will add the docs to code. above logic is to make sure that rather to blindly replace any existing conditions with new one , check for existing condition with same type and replace only if the same condition type has different message. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I wrote in the last comment, I think we need a FLIP for this instead of fixing up this PR |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.flink.kubernetes.operator.api.utils; | ||
|
||
import io.fabric8.kubernetes.api.model.Condition; | ||
import io.fabric8.kubernetes.api.model.ConditionBuilder; | ||
|
||
import java.text.SimpleDateFormat; | ||
import java.util.Date; | ||
|
||
/** Status of CR. */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incorrect javadoc |
||
public class ConditionUtils { | ||
|
||
public static Condition ready(final String message) { | ||
return crCondition("Ready", "True", message, "Ready"); | ||
} | ||
|
||
public static Condition notReady(final String message) { | ||
return crCondition("Ready", "False", message, "Progressing"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason why we arbitrarily chose to have Ready and Error conditions? Why not simply use the ResourceLifecycleState name as the type and description as the message? |
||
} | ||
|
||
public static Condition error(final String message) { | ||
return crCondition("Error", "True", message, "The job terminally failed"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think error means that the job terminally failed in all cases. There are also operator side errors like reconciliation problems etc. |
||
} | ||
|
||
public static Condition crCondition( | ||
final String type, final String status, final String message, final String reason) { | ||
return new ConditionBuilder() | ||
.withType(type) | ||
.withStatus(status) | ||
.withMessage(message) | ||
.withReason(reason) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need a reason, I suggest let's just remove it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As per the doc , https://maelvls.dev/kubernetes-conditions/ , looks like we can keep reason as simple category of cause of the current status.
I was thinking probably we can keep that for any not ready and error conditions, and not required for ready condition?. |
||
.withLastTransitionTime( | ||
new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").format(new Date())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't |
||
.build(); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like a lot of duplicated code and copy-pasted strings from the
ResourceLifeCycleState
, I think we should add this conversion to the enum (or a utility class using the enum directly)