@@ -1343,6 +1343,60 @@ def k8s_resource(set, kind)
13431343 } )
13441344 end
13451345
1346+ it 'creates a paging alert rule for critical status components' do
1347+ factory = eval_stacks do
1348+ stack "mystack" do
1349+ app_service "x" , :kubernetes => true do
1350+ self . maintainers = [ person ( 'Testers' ) ]
1351+ self . description = 'Testing'
1352+ self . alerts_channel = 'test'
1353+ self . page_on_critical = true
1354+
1355+ self . application = 'MyApplication'
1356+ self . startup_alert_threshold = '1h'
1357+ end
1358+ end
1359+ env "e1" , :primary_site => 'space' do
1360+ instantiate_stack "mystack"
1361+ end
1362+ end
1363+ set = factory . inventory . find_environment ( 'e1' ) . definitions [ 'mystack' ] . k8s_machinesets [ 'x' ]
1364+ prometheus_rule = k8s_resource ( set , 'PrometheusRule' )
1365+
1366+ expect ( prometheus_rule [ 'apiVersion' ] ) . to eql ( 'monitoring.coreos.com/v1' )
1367+ expect ( prometheus_rule [ 'metadata' ] ) . to eql ( 'labels' => {
1368+ 'prometheus' => 'main' ,
1369+ 'role' => 'alert-rules' ,
1370+ 'app.kubernetes.io/managed-by' => 'stacks' ,
1371+ 'stack' => 'mystack' ,
1372+ 'machineset' => 'x' ,
1373+ 'group' => 'blue' ,
1374+ 'app.kubernetes.io/instance' => 'blue' ,
1375+ 'app.kubernetes.io/part-of' => 'x' ,
1376+ 'app.kubernetes.io/component' => 'app_service'
1377+ } ,
1378+ 'name' => 'x-blue-app' ,
1379+ 'namespace' => 'e1' )
1380+
1381+ expect ( prometheus_rule [ 'spec' ] [ 'groups' ] . first [ 'name' ] ) . to eql ( 'stacks-alerts' )
1382+ status_critical_rule = prometheus_rule [ 'spec' ] [ 'groups' ] . first [ 'rules' ] . find do |r |
1383+ r [ 'alert' ] == 'StatusCritical'
1384+ end
1385+ expected_status_page_url = "https://go.timgroup.com/insight/space/proxy/{{ $labels.namespace }}/{{ $labels.pod }}/info/status"
1386+ expect ( status_critical_rule ) . to eql ( 'alert' => 'StatusCritical' ,
1387+ 'expr' => 'sum(tucker_component_status{job="x-blue-app",status="critical"}) by (pod, namespace) > 0' ,
1388+ 'labels' => {
1389+ 'severity' => 'critical' ,
1390+ 'alertname' => 'x-blue-app CRITICAL' ,
1391+ 'alert_owner_channel' => 'test' ,
1392+ 'pagerduty' => 'true'
1393+ } ,
1394+ 'annotations' => {
1395+ 'message' => '{{ $value }} components are critical on {{ $labels.namespace }}/{{ $labels.pod }}' ,
1396+ 'status_page_url' => expected_status_page_url
1397+ } )
1398+ end
1399+
13461400 it 'creates an alert rule for pods stuck in a crash loop' do
13471401 factory = eval_stacks do
13481402 stack "mystack" do
0 commit comments