|
| 1 | +""" |
| 2 | +Tests for ECS service deletion race condition fix |
| 3 | +
|
| 4 | +When deleting an ECS service and recreating it with the same name, |
| 5 | +AWS requires the old service to be fully deleted (not just DRAINING) |
| 6 | +before creating a new one. Otherwise, AWS returns: |
| 7 | +"Creation of service was not idempotent" |
| 8 | +
|
| 9 | +These tests verify that create_environment waits for service deletion |
| 10 | +to complete before attempting to create a new service. |
| 11 | +""" |
| 12 | + |
| 13 | +from typing import Any, List |
| 14 | +from unittest.mock import MagicMock, Mock, patch |
| 15 | + |
| 16 | + |
| 17 | +class TestECSServiceDeletionRaceCondition: |
| 18 | + """Test that service deletion waits for completion before creating new service""" |
| 19 | + |
| 20 | + def _create_mock_aws_interface(self) -> MagicMock: |
| 21 | + """Create a mock AWSInterface with mocked boto3 clients""" |
| 22 | + with patch("showtime.core.aws.boto3") as mock_boto3: |
| 23 | + mock_ecs = MagicMock() |
| 24 | + mock_ecr = MagicMock() |
| 25 | + mock_ec2 = MagicMock() |
| 26 | + |
| 27 | + mock_boto3.client.side_effect = lambda service, **kwargs: { |
| 28 | + "ecs": mock_ecs, |
| 29 | + "ecr": mock_ecr, |
| 30 | + "ec2": mock_ec2, |
| 31 | + }[service] |
| 32 | + |
| 33 | + from showtime.core.aws import AWSInterface |
| 34 | + |
| 35 | + aws = AWSInterface() |
| 36 | + aws.ecs_client = mock_ecs |
| 37 | + aws.ecr_client = mock_ecr |
| 38 | + aws.ec2_client = mock_ec2 |
| 39 | + |
| 40 | + return aws |
| 41 | + |
| 42 | + def test_create_environment_waits_for_deletion_when_service_exists(self) -> None: |
| 43 | + """ |
| 44 | + When an existing service with the same name exists, create_environment |
| 45 | + should wait for deletion to complete before creating a new service. |
| 46 | +
|
| 47 | + This test will FAIL until the fix is implemented because the current |
| 48 | + code deletes the service but doesn't wait for deletion to complete. |
| 49 | + """ |
| 50 | + aws = self._create_mock_aws_interface() |
| 51 | + |
| 52 | + # Mock _find_pr_services to return an existing service with same name |
| 53 | + existing_service = { |
| 54 | + "service_name": "pr-1234-abc123f-service", |
| 55 | + "service_arn": "arn:aws:ecs:us-west-2:123456789:service/superset-ci/pr-1234-abc123f-service", |
| 56 | + "sha": "abc123f", |
| 57 | + "status": "ACTIVE", |
| 58 | + "running_count": 1, |
| 59 | + "desired_count": 1, |
| 60 | + "created_at": Mock(), |
| 61 | + } |
| 62 | + |
| 63 | + # Track method calls in order |
| 64 | + call_order: List[str] = [] |
| 65 | + |
| 66 | + def track_delete(*args: Any, **kwargs: Any) -> bool: |
| 67 | + call_order.append("delete_ecs_service") |
| 68 | + return True |
| 69 | + |
| 70 | + def track_wait(*args: Any, **kwargs: Any) -> bool: |
| 71 | + call_order.append("wait_for_service_deletion") |
| 72 | + return True |
| 73 | + |
| 74 | + def track_create(*args: Any, **kwargs: Any) -> bool: |
| 75 | + call_order.append("create_ecs_service") |
| 76 | + return True |
| 77 | + |
| 78 | + # Mock the methods we want to track |
| 79 | + with patch.object(aws, "_find_pr_services", return_value=[existing_service]): |
| 80 | + with patch.object( |
| 81 | + aws, "_create_task_definition_with_image_and_flags", return_value="arn:task-def" |
| 82 | + ): |
| 83 | + with patch.object(aws, "_delete_ecs_service", side_effect=track_delete): |
| 84 | + with patch.object(aws, "_wait_for_service_deletion", side_effect=track_wait): |
| 85 | + with patch.object(aws, "_create_ecs_service", side_effect=track_create): |
| 86 | + with patch.object(aws, "_deploy_task_definition", return_value=True): |
| 87 | + with patch.object( |
| 88 | + aws, "_wait_for_service_stability", return_value=True |
| 89 | + ): |
| 90 | + with patch.object( |
| 91 | + aws, "_health_check_service", return_value=True |
| 92 | + ): |
| 93 | + with patch.object( |
| 94 | + aws, "get_environment_ip", return_value="1.2.3.4" |
| 95 | + ): |
| 96 | + aws.create_environment( |
| 97 | + pr_number=1234, |
| 98 | + sha="abc123f", |
| 99 | + github_user="testuser", |
| 100 | + ) |
| 101 | + |
| 102 | + # Verify the call order: delete -> wait -> create |
| 103 | + assert "delete_ecs_service" in call_order, "Should have called _delete_ecs_service" |
| 104 | + assert "create_ecs_service" in call_order, "Should have called _create_ecs_service" |
| 105 | + |
| 106 | + # The critical assertion: wait_for_service_deletion must be called |
| 107 | + # between delete and create |
| 108 | + assert ( |
| 109 | + "wait_for_service_deletion" in call_order |
| 110 | + ), "Should have called _wait_for_service_deletion after deleting existing service" |
| 111 | + |
| 112 | + delete_idx = call_order.index("delete_ecs_service") |
| 113 | + wait_idx = call_order.index("wait_for_service_deletion") |
| 114 | + create_idx = call_order.index("create_ecs_service") |
| 115 | + |
| 116 | + assert ( |
| 117 | + delete_idx < wait_idx < create_idx |
| 118 | + ), f"Expected delete -> wait -> create, but got: {call_order}" |
| 119 | + |
| 120 | + def test_create_environment_no_wait_when_no_existing_service(self) -> None: |
| 121 | + """ |
| 122 | + When no existing service exists, should not call wait_for_service_deletion. |
| 123 | + """ |
| 124 | + aws = self._create_mock_aws_interface() |
| 125 | + |
| 126 | + wait_called = False |
| 127 | + |
| 128 | + def track_wait(*args: Any, **kwargs: Any) -> bool: |
| 129 | + nonlocal wait_called |
| 130 | + wait_called = True |
| 131 | + return True |
| 132 | + |
| 133 | + # Mock _find_pr_services to return empty list (no existing service) |
| 134 | + with patch.object(aws, "_find_pr_services", return_value=[]): |
| 135 | + with patch.object( |
| 136 | + aws, "_create_task_definition_with_image_and_flags", return_value="arn:task-def" |
| 137 | + ): |
| 138 | + with patch.object(aws, "_wait_for_service_deletion", side_effect=track_wait): |
| 139 | + with patch.object(aws, "_create_ecs_service", return_value=True): |
| 140 | + with patch.object(aws, "_deploy_task_definition", return_value=True): |
| 141 | + with patch.object(aws, "_wait_for_service_stability", return_value=True): |
| 142 | + with patch.object(aws, "_health_check_service", return_value=True): |
| 143 | + with patch.object( |
| 144 | + aws, "get_environment_ip", return_value="1.2.3.4" |
| 145 | + ): |
| 146 | + aws.create_environment( |
| 147 | + pr_number=1234, |
| 148 | + sha="abc123f", |
| 149 | + github_user="testuser", |
| 150 | + ) |
| 151 | + |
| 152 | + # Should NOT call wait_for_service_deletion when no service to delete |
| 153 | + assert not wait_called, "Should not wait for deletion when no existing service" |
| 154 | + |
| 155 | + def test_create_environment_with_force_flag_waits_for_deletion(self) -> None: |
| 156 | + """ |
| 157 | + When force=True and service exists, should wait for deletion. |
| 158 | + This tests the existing force flag path which already has the wait. |
| 159 | + """ |
| 160 | + aws = self._create_mock_aws_interface() |
| 161 | + |
| 162 | + call_order: List[str] = [] |
| 163 | + |
| 164 | + def track_delete(*args: Any, **kwargs: Any) -> bool: |
| 165 | + call_order.append("delete_ecs_service") |
| 166 | + return True |
| 167 | + |
| 168 | + def track_wait(*args: Any, **kwargs: Any) -> bool: |
| 169 | + call_order.append("wait_for_service_deletion") |
| 170 | + return True |
| 171 | + |
| 172 | + def track_create(*args: Any, **kwargs: Any) -> bool: |
| 173 | + call_order.append("create_ecs_service") |
| 174 | + return True |
| 175 | + |
| 176 | + # Mock _service_exists to return True (force flag checks this) |
| 177 | + with patch.object(aws, "_service_exists", return_value=True): |
| 178 | + with patch.object(aws, "_find_pr_services", return_value=[]): |
| 179 | + with patch.object( |
| 180 | + aws, "_create_task_definition_with_image_and_flags", return_value="arn:task-def" |
| 181 | + ): |
| 182 | + with patch.object(aws, "_delete_ecs_service", side_effect=track_delete): |
| 183 | + with patch.object(aws, "_wait_for_service_deletion", side_effect=track_wait): |
| 184 | + with patch.object(aws, "_create_ecs_service", side_effect=track_create): |
| 185 | + with patch.object(aws, "_deploy_task_definition", return_value=True): |
| 186 | + with patch.object( |
| 187 | + aws, "_wait_for_service_stability", return_value=True |
| 188 | + ): |
| 189 | + with patch.object( |
| 190 | + aws, "_health_check_service", return_value=True |
| 191 | + ): |
| 192 | + with patch.object( |
| 193 | + aws, "get_environment_ip", return_value="1.2.3.4" |
| 194 | + ): |
| 195 | + aws.create_environment( |
| 196 | + pr_number=1234, |
| 197 | + sha="abc123f", |
| 198 | + github_user="testuser", |
| 199 | + force=True, |
| 200 | + ) |
| 201 | + |
| 202 | + # Force flag path should wait for deletion |
| 203 | + assert "delete_ecs_service" in call_order |
| 204 | + assert "wait_for_service_deletion" in call_order |
| 205 | + |
| 206 | + delete_idx = call_order.index("delete_ecs_service") |
| 207 | + wait_idx = call_order.index("wait_for_service_deletion") |
| 208 | + |
| 209 | + assert delete_idx < wait_idx, "Wait should come after delete" |
0 commit comments