Skip to content

Commit 87ab7dc

Browse files
committed
refactor: [#24] simplify cloud-init monitoring and improve error handling
- Simplify cloud-init monitoring to wait for completion then check SSH keys - Move SSH key verification back to main execute method - Extract final summary into separate _show_final_summary method - Only print cloud-init logs on errors/timeouts for better debugging - Clean success path with minimal output when everything works - Improve separation of concerns between monitoring and verification - All tests pass and linters are clean
1 parent 6655784 commit 87ab7dc

File tree

1 file changed

+93
-43
lines changed

1 file changed

+93
-43
lines changed

lib/TorrustDeploy/App/Command/provision.pm

Lines changed: 93 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ sub execute {
5858
# Verify SSH key authentication after cloud-init completes
5959
$self->_verify_ssh_key_auth($vm_ip);
6060

61+
# Show final summary
62+
$self->_show_final_summary($vm_ip);
63+
6164
say "Provisioning completed successfully!";
6265
say "VM is ready at IP: $vm_ip";
6366
}
@@ -151,84 +154,131 @@ sub _wait_for_cloud_init {
151154
say "Waiting for cloud-init to complete...";
152155
say "This may take several minutes while packages are installed and configured.";
153156

154-
# Monitor cloud-init progress using password authentication
155-
$self->_monitor_cloud_init($vm_ip);
156-
}
157-
158-
sub _monitor_cloud_init {
159-
my ($self, $vm_ip) = @_;
160-
161-
say "Monitoring cloud-init progress...";
162-
say "Connecting via SSH with password authentication to monitor setup progress...";
163-
164157
my $completion_file = "/var/lib/cloud/torrust-setup-complete";
165-
my $last_line_count = 0;
166-
my $max_attempts = 300; # 25 minutes with 5-second intervals
158+
my $max_attempts = 360; # 30 minutes with 5-second intervals
167159
my $attempt = 0;
160+
my $ssh_connected = 0;
161+
my $cloud_init_success = 0;
168162

163+
# Step 1: Wait until SSH connection is available (for password auth to check cloud-init)
164+
say "⏳ Waiting for SSH service to become available...";
165+
166+
while ($attempt < $max_attempts && !$ssh_connected) {
167+
$attempt++;
168+
169+
my $ssh_test = system("timeout 5 sshpass -p 'torrust123' ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'echo \"SSH connected\"' >/dev/null 2>&1");
170+
if ($ssh_test == 0) {
171+
$ssh_connected = 1;
172+
say "✅ SSH password connection established to $vm_ip";
173+
} else {
174+
if ($attempt % 6 == 0) { # Every 30 seconds
175+
say " [Waiting for SSH connection... ${attempt}0s elapsed]";
176+
}
177+
sleep(5);
178+
}
179+
}
180+
181+
if (!$ssh_connected) {
182+
say "❌ Failed to establish SSH connection to $vm_ip after " . ($max_attempts * 5 / 60) . " minutes";
183+
$self->_print_cloud_init_logs($vm_ip);
184+
die "SSH connection failed";
185+
}
186+
187+
# Step 2: Wait until cloud-init completion marker is created
188+
say "⏳ Waiting for cloud-init to complete...";
189+
190+
$attempt = 0;
169191
while ($attempt < $max_attempts) {
170192
$attempt++;
171193

172-
# Check if completion file exists
173194
my $check_result = system("timeout 10 sshpass -p 'torrust123' ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'test -f $completion_file' >/dev/null 2>&1");
174195

175196
if ($check_result == 0) {
176-
say "\n✅ Cloud-init setup completed successfully!";
197+
say "✅ Cloud-init setup completed successfully!";
177198

178-
# Show final completion message
199+
# Show completion message
179200
my $completion_content = `timeout 10 sshpass -p 'torrust123' ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'cat $completion_file' 2>/dev/null`;
180201
if ($completion_content) {
181202
chomp $completion_content;
182-
say "Completion marker: $completion_content";
183-
}
184-
return;
185-
}
186-
187-
# Get latest cloud-init log output
188-
my $log_output = `timeout 10 sshpass -p 'torrust123' ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'tail -n 20 /var/log/cloud-init-output.log 2>/dev/null || echo "Log not available yet"' 2>/dev/null`;
189-
190-
if ($log_output && $log_output !~ /^Log not available yet/) {
191-
# Count lines to show only new content
192-
my @lines = split /\n/, $log_output;
193-
my $current_line_count = scalar @lines;
194-
195-
if ($current_line_count > $last_line_count) {
196-
# Show new lines
197-
my @new_lines = @lines[($last_line_count)..($current_line_count-1)];
198-
for my $line (@new_lines) {
199-
say " $line" if $line =~ /\S/; # Only non-empty lines
200-
}
201-
$last_line_count = $current_line_count;
203+
say "📅 Completion marker: $completion_content";
202204
}
205+
$cloud_init_success = 1;
206+
last;
203207
}
204208

205-
# Show progress indicator
206-
if ($attempt % 12 == 0) { # Every minute
207-
say " [Still waiting for cloud-init... ${attempt}s elapsed]";
209+
# Show progress indicator every 2 minutes
210+
if ($attempt % 24 == 0) {
211+
my $elapsed_minutes = int($attempt * 5 / 60);
212+
say " [Cloud-init still running... ${elapsed_minutes} minutes elapsed]";
208213
}
209214

210215
sleep(5);
211216
}
212217

213-
die "\nTimeout waiting for cloud-init to complete on $vm_ip";
218+
if (!$cloud_init_success) {
219+
say "❌ Timeout waiting for cloud-init to complete on $vm_ip after " . ($max_attempts * 5 / 60) . " minutes";
220+
$self->_print_cloud_init_logs($vm_ip);
221+
die "Cloud-init timeout";
222+
}
223+
}
224+
225+
sub _show_final_summary {
226+
my ($self, $vm_ip) = @_;
227+
228+
say "📦 Final system summary:";
229+
230+
my $docker_version = `timeout 10 sshpass -p 'torrust123' ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'docker --version 2>/dev/null || echo "Docker not available"' 2>/dev/null`;
231+
chomp $docker_version if $docker_version;
232+
say " Docker: $docker_version" if $docker_version;
233+
234+
my $ufw_status = `timeout 10 sshpass -p 'torrust123' ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'ufw status 2>/dev/null | head -1 || echo "UFW not available"' 2>/dev/null`;
235+
chomp $ufw_status if $ufw_status;
236+
say " Firewall: $ufw_status" if $ufw_status;
237+
238+
say "Provisioning completed successfully!";
239+
say "VM is ready at IP: $vm_ip";
240+
}
241+
242+
sub _print_cloud_init_logs {
243+
my ($self, $vm_ip) = @_;
244+
245+
say "📄 Cloud-init logs (for debugging):";
246+
247+
# Print cloud-init-output.log
248+
say "=== /var/log/cloud-init-output.log ===";
249+
my $output_log = `timeout 30 sshpass -p 'torrust123' ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'sudo cat /var/log/cloud-init-output.log 2>/dev/null || echo "Log file not available"' 2>/dev/null`;
250+
if ($output_log && $output_log !~ /^Log file not available/) {
251+
print $output_log;
252+
} else {
253+
say "Cloud-init output log not available";
254+
}
255+
256+
say "=== /var/log/cloud-init.log ===";
257+
my $main_log = `timeout 30 sshpass -p 'torrust123' ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null torrust\@$vm_ip 'sudo cat /var/log/cloud-init.log 2>/dev/null || echo "Log file not available"' 2>/dev/null`;
258+
if ($main_log && $main_log !~ /^Log file not available/) {
259+
print $main_log;
260+
} else {
261+
say "Cloud-init main log not available";
262+
}
214263
}
215264

216265
sub _verify_ssh_key_auth {
217266
my ($self, $vm_ip) = @_;
218267

219-
say "\nVerifying SSH key authentication...";
268+
say "🔑 Checking SSH key authentication...";
220269

221270
my $ssh_key_path = "$ENV{HOME}/.ssh/testing_rsa";
222271

223272
# Test SSH key authentication
224-
my $result = system("timeout 10 ssh -i '$ssh_key_path' -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PasswordAuthentication=no torrust\@$vm_ip 'echo \"SSH key authentication successful\"' 2>/dev/null");
273+
my $result = system("timeout 10 ssh -i '$ssh_key_path' -o ConnectTimeout=10 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o PasswordAuthentication=no torrust\@$vm_ip 'echo \"SSH key authentication successful\"' >/dev/null 2>&1");
225274

226275
if ($result == 0) {
227276
say "✅ SSH key authentication is working correctly!";
228277
say "You can now connect using: ssh -i ~/.ssh/testing_rsa torrust\@$vm_ip";
229278
} else {
230-
say "⚠️ SSH key authentication failed. You may need to use password authentication.";
231-
say "Try: ssh torrust\@$vm_ip (password: torrust123)";
279+
say "❌ SSH key authentication failed";
280+
$self->_print_cloud_init_logs($vm_ip);
281+
die "SSH key authentication failed";
232282
}
233283
}
234284

0 commit comments

Comments
 (0)