Skip to content

Commit eac03d5

Browse files
authored
Merge pull request #116 from bozemanpass/dboreham/catch-k8s-exceptions
Address k8s api exceptions in CI tests
2 parents 6a7c012 + bf88e41 commit eac03d5

3 files changed

Lines changed: 117 additions & 98 deletions

File tree

.github/workflows/test.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@ name: Smoke Test
22

33
on:
44
pull_request:
5-
branches: '*'
5+
branches:
6+
- '*'
67
push:
7-
branches: '*'
8+
branches:
9+
- '*'
810

911
jobs:
1012
test:

src/stack/deploy/k8s/deploy_k8s.py

Lines changed: 107 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -216,116 +216,127 @@ def _find_certificate_for_host_name(self, host_name):
216216
return None
217217

218218
def up(self, detach, skip_cluster_management, services):
219-
self.skip_cluster_management = skip_cluster_management
220-
if not opts.o.dry_run:
221-
if self.is_kind() and not self.skip_cluster_management:
222-
# Create the kind cluster
223-
create_cluster(
224-
self.kind_cluster_name,
225-
self.deployment_dir.joinpath(constants.kind_config_filename),
226-
)
227-
# Ensure the referenced containers are copied into kind
228-
load_images_into_kind(self.kind_cluster_name, self.cluster_info.image_set)
229-
self.connect_api()
230-
if self.is_kind() and not self.skip_cluster_management:
231-
# Now configure an ingress controller (not installed by default in kind)
232-
install_ingress_for_kind()
233-
# Wait for ingress to start (deployment provisioning will fail unless this is done)
234-
wait_for_ingress_in_kind()
235-
236-
else:
237-
log_info("Dry run mode enabled, skipping k8s API connect")
238-
239-
self._create_volume_data()
240-
self._create_deployments()
241-
242-
http_proxy_info = self.cluster_info.spec.get_http_proxy()
243-
# Note: at present we don't support tls for kind (and enabling tls causes errors)
244-
use_tls = http_proxy_info and not self.is_kind()
245-
certificate = self._find_certificate_for_host_name(http_proxy_info[0]["host-name"]) if use_tls else None
246-
if certificate:
247-
log_debug(f"Using existing certificate: {certificate}")
248-
249-
ingress: client.V1Ingress = self.cluster_info.get_ingress(use_tls=use_tls, certificate=certificate)
250-
if ingress:
251-
log_debug(f"Sending this ingress: {ingress}")
219+
try:
220+
self.skip_cluster_management = skip_cluster_management
252221
if not opts.o.dry_run:
253-
ingress_resp = self.networking_api.create_namespaced_ingress(namespace=self.k8s_namespace, body=ingress)
254-
log_debug("Ingress created:")
255-
log_debug(f"{ingress_resp}")
256-
else:
257-
log_debug("No ingress configured")
222+
if self.is_kind() and not self.skip_cluster_management:
223+
# Create the kind cluster
224+
create_cluster(
225+
self.kind_cluster_name,
226+
self.deployment_dir.joinpath(constants.kind_config_filename),
227+
)
228+
# Ensure the referenced containers are copied into kind
229+
load_images_into_kind(self.kind_cluster_name, self.cluster_info.image_set)
230+
self.connect_api()
231+
if self.is_kind() and not self.skip_cluster_management:
232+
# Now configure an ingress controller (not installed by default in kind)
233+
install_ingress_for_kind()
234+
# Wait for ingress to start (deployment provisioning will fail unless this is done)
235+
wait_for_ingress_in_kind()
236+
237+
else:
238+
log_info("Dry run mode enabled, skipping k8s API connect")
239+
240+
self._create_volume_data()
241+
self._create_deployments()
242+
243+
http_proxy_info = self.cluster_info.spec.get_http_proxy()
244+
# Note: at present we don't support tls for kind (and enabling tls causes errors)
245+
use_tls = http_proxy_info and not self.is_kind()
246+
certificate = self._find_certificate_for_host_name(http_proxy_info[0]["host-name"]) if use_tls else None
247+
if certificate:
248+
log_debug(f"Using existing certificate: {certificate}")
249+
250+
ingress: client.V1Ingress = self.cluster_info.get_ingress(use_tls=use_tls, certificate=certificate)
251+
if ingress:
252+
log_debug(f"Sending this ingress: {ingress}")
253+
if not opts.o.dry_run:
254+
# We've seen this exception thrown here: kubernetes.client.exceptions.ApiException: (500)
255+
ingress_resp = self.networking_api.create_namespaced_ingress(namespace=self.k8s_namespace, body=ingress)
256+
log_debug("Ingress created:")
257+
log_debug(f"{ingress_resp}")
258+
else:
259+
log_debug("No ingress configured")
260+
except Exception as e:
261+
error_exit(f"Exception thrown bringing stack up: {e}")
258262

259263
def down(self, timeout, volumes, skip_cluster_management): # noqa: C901
260-
self.skip_cluster_management = skip_cluster_management
261-
self.connect_api()
262-
# Delete the k8s objects
264+
try:
265+
self.skip_cluster_management = skip_cluster_management
266+
self.connect_api()
267+
# Delete the k8s objects
268+
269+
if volumes:
270+
# Create the host-path-mounted PVs for this deployment
271+
pvs = self.cluster_info.get_pvs()
272+
for pv in pvs:
273+
log_debug(f"Deleting this pv: {pv}")
274+
try:
275+
pv_resp = self.core_api.delete_persistent_volume(name=pv.metadata.name)
276+
log_debug("PV deleted:")
277+
log_debug(f"{pv_resp}")
278+
except client.exceptions.ApiException as e:
279+
_check_delete_exception(e)
280+
281+
# Figure out the PVCs for this deployment
282+
pvcs = self.cluster_info.get_pvcs()
283+
for pvc in pvcs:
284+
log_debug(f"Deleting this pvc: {pvc}")
285+
try:
286+
pvc_resp = self.core_api.delete_namespaced_persistent_volume_claim(
287+
name=pvc.metadata.name, namespace=self.k8s_namespace
288+
)
289+
log_debug("PVCs deleted:")
290+
log_debug(f"{pvc_resp}")
291+
except client.exceptions.ApiException as e:
292+
_check_delete_exception(e)
263293

264-
if volumes:
265-
# Create the host-path-mounted PVs for this deployment
266-
pvs = self.cluster_info.get_pvs()
267-
for pv in pvs:
268-
log_debug(f"Deleting this pv: {pv}")
294+
# Figure out the ConfigMaps for this deployment
295+
cfg_maps = self.cluster_info.get_configmaps()
296+
for cfg_map in cfg_maps:
297+
log_debug(f"Deleting this ConfigMap: {cfg_map}")
269298
try:
270-
pv_resp = self.core_api.delete_persistent_volume(name=pv.metadata.name)
271-
log_debug("PV deleted:")
272-
log_debug(f"{pv_resp}")
299+
cfg_map_resp = self.core_api.delete_namespaced_config_map(
300+
name=cfg_map.metadata.name,
301+
namespace=self.k8s_namespace
302+
)
303+
log_debug("ConfigMap deleted:")
304+
log_debug(f"{cfg_map_resp}")
273305
except client.exceptions.ApiException as e:
274306
_check_delete_exception(e)
275307

276-
# Figure out the PVCs for this deployment
277-
pvcs = self.cluster_info.get_pvcs()
278-
for pvc in pvcs:
279-
log_debug(f"Deleting this pvc: {pvc}")
308+
deployments = self.cluster_info.get_deployments()
309+
for deployment in deployments:
310+
log_debug(f"Deleting this deployment: {deployment}")
280311
try:
281-
pvc_resp = self.core_api.delete_namespaced_persistent_volume_claim(
282-
name=pvc.metadata.name, namespace=self.k8s_namespace
283-
)
284-
log_debug("PVCs deleted:")
285-
log_debug(f"{pvc_resp}")
312+
self.apps_api.delete_namespaced_deployment(name=deployment.metadata.name, namespace=self.k8s_namespace)
286313
except client.exceptions.ApiException as e:
287314
_check_delete_exception(e)
288315

289-
# Figure out the ConfigMaps for this deployment
290-
cfg_maps = self.cluster_info.get_configmaps()
291-
for cfg_map in cfg_maps:
292-
log_debug(f"Deleting this ConfigMap: {cfg_map}")
293-
try:
294-
cfg_map_resp = self.core_api.delete_namespaced_config_map(name=cfg_map.metadata.name, namespace=self.k8s_namespace)
295-
log_debug("ConfigMap deleted:")
296-
log_debug(f"{cfg_map_resp}")
297-
except client.exceptions.ApiException as e:
298-
_check_delete_exception(e)
299-
300-
deployments = self.cluster_info.get_deployments()
301-
for deployment in deployments:
302-
log_debug(f"Deleting this deployment: {deployment}")
303-
try:
304-
self.apps_api.delete_namespaced_deployment(name=deployment.metadata.name, namespace=self.k8s_namespace)
305-
except client.exceptions.ApiException as e:
306-
_check_delete_exception(e)
316+
services: client.V1Service = self.cluster_info.get_services()
317+
for svc in services:
318+
log_debug(f"Deleting service: {svc}")
319+
try:
320+
self.core_api.delete_namespaced_service(namespace=self.k8s_namespace, name=svc.metadata.name)
321+
except client.exceptions.ApiException as e:
322+
_check_delete_exception(e)
307323

308-
services: client.V1Service = self.cluster_info.get_services()
309-
for svc in services:
310-
log_debug(f"Deleting service: {svc}")
311-
try:
312-
self.core_api.delete_namespaced_service(namespace=self.k8s_namespace, name=svc.metadata.name)
313-
except client.exceptions.ApiException as e:
314-
_check_delete_exception(e)
315-
316-
ingress: client.V1Ingress = self.cluster_info.get_ingress(use_tls=not self.is_kind())
317-
if ingress:
318-
log_debug(f"Deleting this ingress: {ingress}")
319-
try:
320-
self.networking_api.delete_namespaced_ingress(name=ingress.metadata.name, namespace=self.k8s_namespace)
321-
except client.exceptions.ApiException as e:
322-
_check_delete_exception(e)
323-
else:
324-
log_debug("No ingress to delete")
324+
ingress: client.V1Ingress = self.cluster_info.get_ingress(use_tls=not self.is_kind())
325+
if ingress:
326+
log_debug(f"Deleting this ingress: {ingress}")
327+
try:
328+
self.networking_api.delete_namespaced_ingress(name=ingress.metadata.name, namespace=self.k8s_namespace)
329+
except client.exceptions.ApiException as e:
330+
_check_delete_exception(e)
331+
else:
332+
log_debug("No ingress to delete")
333+
334+
if self.is_kind() and not self.skip_cluster_management:
335+
# Destroy the kind cluster
336+
destroy_cluster(self.kind_cluster_name)
325337

326-
if self.is_kind() and not self.skip_cluster_management:
327-
# Destroy the kind cluster
328-
destroy_cluster(self.kind_cluster_name)
338+
except Exception as e:
339+
error_exit(f"Exception thrown bringing stack up: {e}")
329340

330341
def status(self):
331342
self.connect_api()

src/stack/deploy/k8s/helpers.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from kubernetes import client, utils, watch
2222
from pathlib import Path
2323
from ruamel.yaml.comments import CommentedSeq
24+
from time import sleep
2425
from typing import Set, Mapping, List
2526

2627
from stack.build.build_util import container_exists_locally
@@ -58,6 +59,11 @@ def wait_for_ingress_in_kind():
5859
if event["object"].status.container_statuses[0].ready is True:
5960
if warned_waiting:
6061
log_info("Ingress controller is ready")
62+
# Hack to work around https://github.com/bozemanpass/stack/issues/110
63+
# Theory is that depending on when in the 30 second polling cycle we hit ready,
64+
# the controller may not actually be ready to serve ingress requests yet.
65+
# So we wait a bit longer here.
66+
sleep(10)
6167
return
6268
log_info("Waiting for ingress controller to become ready...")
6369
warned_waiting = True

0 commit comments

Comments
 (0)