Skip to content
78 changes: 44 additions & 34 deletions internal/provider/template_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -1106,47 +1106,57 @@ func waitForJob(ctx context.Context, client *codersdk.Client, version *codersdk.
const maxRetries = 3
var jobLogs []codersdk.ProvisionerJobLog
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this variable here now?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope, removed it. waitForJobOnce now manages its own jobLogs internally and the caller accumulates via append(allLogs, logs...).

for retries := 0; retries < maxRetries; retries++ {
logs, closer, err := client.TemplateVersionLogsAfter(ctx, version.ID, 0)
jobLogs, done, err := waitForJobOnce(ctx, client, version, jobLogs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shadows variable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — fixed. Changed to use explicit var done bool / var err error declarations with = assignment instead of := so jobLogs is no longer shadowed.

Also added 5 unit tests covering waitForJobOnce and waitForJob using httptest + WebSocket mocking:

  • Success, failure, still-active cases for waitForJobOnce
  • Retry with separate WS connections and log accumulation across retries for waitForJob

if err != nil {
return jobLogs, fmt.Errorf("begin streaming logs: %w", err)
return jobLogs, err
}
defer func() {
if err := closer.Close(); err != nil {
tflog.Warn(ctx, "error closing template version log stream", map[string]any{
"error": err,
})
}
}()
for {
logs, ok := <-logs
if !ok {
break
}
tflog.Info(ctx, logs.Output, map[string]interface{}{
"job_id": logs.ID,
"job_stage": logs.Stage,
"log_source": logs.Source,
"level": logs.Level,
"created_at": logs.CreatedAt,
})
if logs.Output != "" {
jobLogs = append(jobLogs, logs)
}
if done {
return jobLogs, nil
}
latestResp, err := client.TemplateVersion(ctx, version.ID)
if err != nil {
return jobLogs, err
}
return jobLogs, fmt.Errorf("provisioner job did not complete after %d retries", maxRetries)
}

func waitForJobOnce(ctx context.Context, client *codersdk.Client, version *codersdk.TemplateVersion, jobLogs []codersdk.ProvisionerJobLog) ([]codersdk.ProvisionerJobLog, bool, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to pass in jobLogs here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call — removed the parameter. waitForJobOnce now returns only its own logs and the caller appends them.

logs, closer, err := client.TemplateVersionLogsAfter(ctx, version.ID, 0)
if err != nil {
return jobLogs, false, fmt.Errorf("begin streaming logs: %w", err)
}
defer func() {
if err := closer.Close(); err != nil {
tflog.Warn(ctx, "error closing template version log stream", map[string]any{
"error": err,
})
}
if latestResp.Job.Status.Active() {
tflog.Warn(ctx, fmt.Sprintf("provisioner job still active, continuing to wait...: %s", latestResp.Job.Status))
continue
}()
for {
logs, ok := <-logs
if !ok {
break
}
if latestResp.Job.Status != codersdk.ProvisionerJobSucceeded {
return jobLogs, fmt.Errorf("provisioner job did not succeed: %s (%s)", latestResp.Job.Status, latestResp.Job.Error)
tflog.Info(ctx, logs.Output, map[string]interface{}{
"job_id": logs.ID,
"job_stage": logs.Stage,
"log_source": logs.Source,
"level": logs.Level,
"created_at": logs.CreatedAt,
})
if logs.Output != "" {
jobLogs = append(jobLogs, logs)
}
return jobLogs, nil
}
return jobLogs, fmt.Errorf("provisioner job did not complete after %d retries", maxRetries)
latestResp, err := client.TemplateVersion(ctx, version.ID)
if err != nil {
return jobLogs, false, err
}
if latestResp.Job.Status.Active() {
tflog.Warn(ctx, fmt.Sprintf("provisioner job still active, continuing to wait...: %s", latestResp.Job.Status))
return jobLogs, false, nil
}
if latestResp.Job.Status != codersdk.ProvisionerJobSucceeded {
return jobLogs, false, fmt.Errorf("provisioner job did not succeed: %s (%s)", latestResp.Job.Status, latestResp.Job.Error)
}
return jobLogs, true, nil
}

type newVersionRequest struct {
Expand Down