Skip to content

Commit 07e44b9

Browse files
Merge pull request #46 from heliapb/feat/add_prom_agent
2 parents a68c961 + 1f2037a commit 07e44b9

File tree

6 files changed

+784
-144
lines changed

6 files changed

+784
-144
lines changed

Documentation/commands/analyze/index.md

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ The Prometheus server requires proper RBAC (Role-Based Access Control) rules to
7070

7171
Since Prometheus just reads Objects in the Kubernetes API, it requires the get, list, and watch actions. As Prometheus can also be used to scrape metrics from the Kubernetes apiserver, it also requires access to the /metrics/ endpoint of it. In addition to the rules for Prometheus itself, the Prometheus needs to be able to get configmaps to be able to pull in rule files from configmap objects.
7272

73-
### Prometheus Namespace Selectors and Service Selectors
73+
### Prometheus Namespace Selectors and Monitors Selectors
7474

7575
The Prometheus server relies on proper service discovery to function correctly. To achieve this, we must ensure that any defined Namespace Selector corresponds to an existing namespace. Similarly, for Service Selectors, it is crucial that they align with existing resources. Whether using ServiceMonitor, PodMonitor, ScrapeConfig, Probe, or PrometheusRule, the respective Custom Resource (CR) must exist and be properly matched.
7676

@@ -91,3 +91,19 @@ Alertmanager configuration must be provided in one of the following ways:
9191
* As a Kubernetes secret provided by the user, that needs to ensure the data is stored in a file called alertmanager.yaml
9292
* The Operator will provide a default generated Kubernetes secret to use
9393
* Via the AlertmanagerConfig CRDs (Custom Resource Definitions), that should be matched by a Namespace selector in a given namespace, a ConfigSelector or the ConfigSelector Name
94+
95+
## Analyze Prometheus Agent
96+
97+
### Prometheus Agent Existence
98+
99+
The Prometheus Agent object must exist in the Kubernetes cluster, which can be confirmed by checking for the presence of the Prometheus Agent CR (Custom Resource) in the specified namespace and under the given name.
100+
101+
### Prometheus Agent RBAC Rules
102+
103+
The Prometheus Agent server requires proper RBAC (Role-Based Access Control) rules to function correctly. This means the service account associated with the Prometheus Agent must have permissions aligned with the Prometheus Agent CRDs (Custom Resource Definitions) present in the cluster.
104+
105+
Since Prometheus Agent just reads Objects in the Kubernetes API, it requires the get, list, and watch actions. As Prometheus Agent can also be used to scrape metrics from the Kubernetes apiserver, it also requires access to the /metrics/ endpoint of it. In addition to the rules for Prometheus Agent itself, the Prometheus Agent needs to be able to get configmaps to be able to pull in rule files from configmap objects.
106+
107+
### Prometheus Agent Namespace Selectors and Monitors Selectors
108+
109+
The Prometheus Agent server relies on proper service discovery to function correctly. To achieve this, we must ensure that any defined Namespace Selector corresponds to an existing namespace. Similarly, for Service Selectors, it is crucial that they align with existing resources. Whether using ServiceMonitor, PodMonitor, ScrapeConfig or Probe, the respective Custom Resource (CR) must exist and be properly matched.

cmd/analyze.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ import (
2828
type AnalyzeKind string
2929

3030
const (
31-
ServiceMonitor AnalyzeKind = "servicemonitor"
32-
Operator AnalyzeKind = "operator"
33-
Prometheus AnalyzeKind = "prometheus"
34-
Alertmanager AnalyzeKind = "alertmanager"
31+
ServiceMonitor AnalyzeKind = "servicemonitor"
32+
Operator AnalyzeKind = "operator"
33+
Prometheus AnalyzeKind = "prometheus"
34+
Alertmanager AnalyzeKind = "alertmanager"
35+
PrometheusAgent AnalyzeKind = "prometheusagent"
3536
)
3637

3738
type AnalyzeFlags struct {
@@ -84,6 +85,8 @@ func run(cmd *cobra.Command, _ []string) error {
8485
return analyzers.RunPrometheusAnalyzer(cmd.Context(), clientSets, analyzerFlags.Name, analyzerFlags.Namespace)
8586
case Alertmanager:
8687
return analyzers.RunAlertmanagerAnalyzer(cmd.Context(), clientSets, analyzerFlags.Name, analyzerFlags.Namespace)
88+
case PrometheusAgent:
89+
return analyzers.RunPrometheusAgentAnalyzer(cmd.Context(), clientSets, analyzerFlags.Name, analyzerFlags.Namespace)
8790
default:
8891
return fmt.Errorf("kind %s not supported", analyzerFlags.Kind)
8992
}

internal/analyzers/prometheus.go

Lines changed: 6 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,10 @@ import (
1818
"context"
1919
"fmt"
2020
"log/slog"
21-
"strings"
2221

2322
"github.com/prometheus-operator/poctl/internal/k8sutil"
24-
v1 "k8s.io/api/rbac/v1"
2523
"k8s.io/apimachinery/pkg/api/errors"
2624
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27-
"k8s.io/apimachinery/pkg/labels"
28-
)
29-
30-
const (
31-
ServiceMonitor = "ServiceMonitor"
32-
PodMonitor = "PodMonitor"
33-
Probe = "Probe"
34-
ScrapeConfig = "ScrapeConfig"
35-
PrometheusRule = "PrometheusRule"
3625
)
3726

3827
func RunPrometheusAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets, name, namespace string) error {
@@ -61,7 +50,7 @@ func RunPrometheusAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets,
6150
return fmt.Errorf("failed to get ClusterRole %s", crb.RoleRef.Name)
6251
}
6352

64-
err = checkClusterRoleRules(crb, cr)
53+
err = k8sutil.CheckPrometheusClusterRoleRules(crb, cr)
6554
if err != nil {
6655
return err
6756
}
@@ -87,148 +76,26 @@ func RunPrometheusAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets,
8776
return fmt.Errorf("ruleNamespaceSelector is not properly defined: %s", err)
8877
}
8978

90-
if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.ServiceMonitorSelector, ServiceMonitor, namespace); err != nil {
79+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.ServiceMonitorSelector, k8sutil.ServiceMonitor, namespace); err != nil {
9180
return fmt.Errorf("serviceMonitorSelector is not properly defined: %s", err)
9281
}
9382

94-
if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.PodMonitorSelector, PodMonitor, namespace); err != nil {
83+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.PodMonitorSelector, k8sutil.PodMonitor, namespace); err != nil {
9584
return fmt.Errorf("podMonitorSelector is not properly defined: %s", err)
9685
}
9786

98-
if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.ProbeSelector, Probe, namespace); err != nil {
87+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.ProbeSelector, k8sutil.Probe, namespace); err != nil {
9988
return fmt.Errorf("probeSelector is not properly defined: %s", err)
10089
}
10190

102-
if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.ScrapeConfigSelector, ScrapeConfig, namespace); err != nil {
91+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.ScrapeConfigSelector, k8sutil.ScrapeConfig, namespace); err != nil {
10392
return fmt.Errorf("scrapeConfigSelector is not properly defined: %s", err)
10493
}
10594

106-
if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.RuleSelector, PrometheusRule, namespace); err != nil {
95+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.RuleSelector, k8sutil.PrometheusRule, namespace); err != nil {
10796
return fmt.Errorf("ruleSelector is not properly defined: %s", err)
10897
}
10998

11099
slog.Info("Prometheus is compliant, no issues found", "name", name, "namespace", namespace)
111100
return nil
112101
}
113-
114-
func checkClusterRoleRules(crb v1.ClusterRoleBinding, cr *v1.ClusterRole) error {
115-
var errs []string
116-
verbsToCheck := []string{"get", "list", "watch"}
117-
missingVerbs := []string{}
118-
119-
for _, rule := range cr.Rules {
120-
for _, resource := range rule.Resources {
121-
found := false
122-
if resource == "configmaps" {
123-
for _, verb := range rule.Verbs {
124-
if verb == "get" {
125-
found = true
126-
break
127-
}
128-
}
129-
if !found {
130-
errs = append(errs, fmt.Sprintf("ClusterRole %s does not include 'configmaps' with 'get' in its verbs", crb.RoleRef.Name))
131-
}
132-
continue
133-
}
134-
for range rule.APIGroups {
135-
for _, requiredVerb := range verbsToCheck {
136-
found := false
137-
for _, verb := range rule.Verbs {
138-
if verb == requiredVerb {
139-
found = true
140-
break
141-
}
142-
}
143-
if !found {
144-
missingVerbs = append(missingVerbs, requiredVerb)
145-
}
146-
}
147-
if len(missingVerbs) > 0 {
148-
errs = append(errs, fmt.Sprintf("ClusterRole %s is missing necessary verbs for APIGroups: %v", crb.RoleRef.Name, missingVerbs))
149-
}
150-
}
151-
}
152-
for _, nonResource := range rule.NonResourceURLs {
153-
if nonResource == "/metrics" {
154-
hasGet := false
155-
for _, verb := range rule.Verbs {
156-
if verb == "get" {
157-
hasGet = true
158-
break
159-
}
160-
}
161-
if !hasGet {
162-
errs = append(errs, fmt.Sprintf("ClusterRole %s does not include 'get' verb for NonResourceURL '/metrics'", crb.RoleRef.Name))
163-
}
164-
}
165-
}
166-
}
167-
168-
if len(errs) > 0 {
169-
return fmt.Errorf("multiple errors found:\n%s", strings.Join(errs, "\n"))
170-
}
171-
return nil
172-
}
173-
174-
func checkResourceLabelSelectors(ctx context.Context, clientSets *k8sutil.ClientSets, labelSelector *metav1.LabelSelector, resourceName, namespace string) error {
175-
if labelSelector == nil {
176-
return fmt.Errorf("%s selector is not defined", resourceName)
177-
}
178-
179-
if len(labelSelector.MatchLabels) == 0 && len(labelSelector.MatchExpressions) == 0 {
180-
return nil
181-
}
182-
183-
labelMap, err := metav1.LabelSelectorAsMap(labelSelector)
184-
if err != nil {
185-
return fmt.Errorf("invalid label selector format in %s: %v", resourceName, err)
186-
}
187-
188-
switch resourceName {
189-
case ServiceMonitor:
190-
serviceMonitors, err := clientSets.MClient.MonitoringV1().ServiceMonitors(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
191-
if err != nil {
192-
return fmt.Errorf("failed to list ServiceMonitors in %s: %v", namespace, err)
193-
}
194-
if len(serviceMonitors.Items) == 0 {
195-
return fmt.Errorf("no ServiceMonitors match the provided selector in Prometheus %s", namespace)
196-
}
197-
case PodMonitor:
198-
podMonitors, err := clientSets.MClient.MonitoringV1().PodMonitors(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
199-
if err != nil {
200-
return fmt.Errorf("failed to list PodMonitor in %s: %v", namespace, err)
201-
}
202-
if len(podMonitors.Items) == 0 {
203-
return fmt.Errorf("no PodMonitors match the provided selector in Prometheus %s", namespace)
204-
}
205-
case Probe:
206-
probes, err := clientSets.MClient.MonitoringV1().Probes(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
207-
if err != nil {
208-
return fmt.Errorf("failed to list Probes in %s: %v", namespace, err)
209-
}
210-
if len(probes.Items) == 0 {
211-
return fmt.Errorf("no Probes match the provided selector in Prometheus %s", namespace)
212-
}
213-
case ScrapeConfig:
214-
scrapeConfigs, err := clientSets.MClient.MonitoringV1alpha1().ScrapeConfigs(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
215-
if err != nil {
216-
return fmt.Errorf("failed to list ScrapeConfigs in %s: %v", namespace, err)
217-
}
218-
if len(scrapeConfigs.Items) == 0 {
219-
return fmt.Errorf("no ScrapeConfigs match the provided selector in Prometheus %s", namespace)
220-
}
221-
case PrometheusRule:
222-
promRules, err := clientSets.MClient.MonitoringV1().PrometheusRules(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
223-
if err != nil {
224-
return fmt.Errorf("failed to list Probes in %s: %v", namespace, err)
225-
}
226-
if len(promRules.Items) == 0 {
227-
return fmt.Errorf("no PrometheusRules match the provided selector in Prometheus %s", namespace)
228-
}
229-
default:
230-
return fmt.Errorf("unknown selector type: %s", resourceName)
231-
}
232-
233-
return nil
234-
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright 2024 The prometheus-operator Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package analyzers
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"log/slog"
21+
22+
"github.com/prometheus-operator/poctl/internal/k8sutil"
23+
"k8s.io/apimachinery/pkg/api/errors"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
)
26+
27+
func RunPrometheusAgentAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets, name, namespace string) error {
28+
prometheusagent, err := clientSets.MClient.MonitoringV1alpha1().PrometheusAgents(namespace).Get(ctx, name, metav1.GetOptions{})
29+
if err != nil {
30+
if errors.IsNotFound(err) {
31+
return fmt.Errorf("prometheus %s not found in namespace %s", name, namespace)
32+
}
33+
return fmt.Errorf("error while getting Prometheus: %v", err)
34+
}
35+
36+
cRb, err := clientSets.KClient.RbacV1().ClusterRoleBindings().List(ctx, metav1.ListOptions{
37+
LabelSelector: "name=prometheus-agent",
38+
})
39+
if err != nil {
40+
return fmt.Errorf("failed to list RoleBindings: %w", err)
41+
}
42+
43+
if !k8sutil.IsServiceAccountBoundToRoleBindingList(cRb, prometheusagent.Spec.ServiceAccountName) {
44+
return fmt.Errorf("serviceAccount %s is not bound to any RoleBindings", prometheusagent.Spec.ServiceAccountName)
45+
}
46+
47+
for _, crb := range cRb.Items {
48+
cr, err := clientSets.KClient.RbacV1().ClusterRoles().Get(ctx, crb.RoleRef.Name, metav1.GetOptions{})
49+
if err != nil {
50+
return fmt.Errorf("failed to get ClusterRole %s", crb.RoleRef.Name)
51+
}
52+
53+
err = k8sutil.CheckPrometheusClusterRoleRules(crb, cr)
54+
if err != nil {
55+
return err
56+
}
57+
}
58+
59+
if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.PodMonitorNamespaceSelector); err != nil {
60+
return fmt.Errorf("podMonitorNamespaceSelector is not properly defined: %s", err)
61+
}
62+
63+
if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.ProbeNamespaceSelector); err != nil {
64+
return fmt.Errorf("probeNamespaceSelector is not properly defined: %s", err)
65+
}
66+
67+
if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.ServiceMonitorNamespaceSelector); err != nil {
68+
return fmt.Errorf("serviceMonitorNamespaceSelector is not properly defined: %s", err)
69+
}
70+
71+
if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.ScrapeConfigNamespaceSelector); err != nil {
72+
return fmt.Errorf("scrapeConfigNamespaceSelector is not properly defined: %s", err)
73+
}
74+
75+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.ServiceMonitorSelector, k8sutil.ServiceMonitor, namespace); err != nil {
76+
return fmt.Errorf("serviceMonitorSelector is not properly defined: %s", err)
77+
}
78+
79+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.PodMonitorSelector, k8sutil.PodMonitor, namespace); err != nil {
80+
return fmt.Errorf("podMonitorSelector is not properly defined: %s", err)
81+
}
82+
83+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.ProbeSelector, k8sutil.Probe, namespace); err != nil {
84+
return fmt.Errorf("probeSelector is not properly defined: %s", err)
85+
}
86+
87+
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.ScrapeConfigSelector, k8sutil.ScrapeConfig, namespace); err != nil {
88+
return fmt.Errorf("scrapeConfigSelector is not properly defined: %s", err)
89+
}
90+
91+
slog.Info("prometheusagent Agent is compliant, no issues found", "name", name, "namespace", namespace)
92+
return nil
93+
}

0 commit comments

Comments
 (0)