-
Notifications
You must be signed in to change notification settings - Fork 47
Refactor old special steps #245
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 62 commits
f40c0db
a71f0bd
7723cbf
a435f58
ea6a2ec
23d93d8
ef4ce90
1f7bf00
f48cccb
4239a22
4901aaa
726015c
cc0f286
8ba8b88
a350c45
3af6da0
27fdb77
c90b34a
3891a07
5699b9f
12bab3a
0102e5c
9543d91
1844aee
dadd479
c847897
7707b1c
0b7f444
2d22b04
14dac98
2ea00d7
3d47f31
80344eb
5055aa4
27d714f
f95b92e
4a2f40e
4ec0450
8a453fe
c838e60
36a8ef0
1cde6a9
7bfa02e
e727113
adb9cc0
3329d59
595b1de
9d6c307
7268e9f
15698bb
4763419
054a0d4
313f34e
a85d175
b3f300c
24e3d80
c9045cc
61670d7
2190665
6356b12
9f3876f
2177d6b
8451566
aed8664
a28ab94
46be9b5
45d19a2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1680,10 +1680,6 @@ func (s *sysDB) sleep(ctx context.Context, input sleepInput) (time.Duration, err | |
| return 0, newStepExecutionError("", functionName, fmt.Errorf("workflow state not found in context: are you running this step within a workflow?")) | ||
| } | ||
|
|
||
| if wfState.isWithinStep { | ||
| return 0, newStepExecutionError(wfState.workflowID, functionName, fmt.Errorf("cannot call Sleep within a step")) | ||
| } | ||
|
|
||
| // Determine step ID | ||
| var stepID int | ||
| if input.stepID != nil && *input.stepID >= 0 { | ||
|
|
@@ -2042,98 +2038,38 @@ type WorkflowSendInput struct { | |
| DestinationID string | ||
| Message any | ||
| Topic string | ||
| tx pgx.Tx | ||
| } | ||
|
|
||
| // Send is a special type of step that sends a message to another workflow. | ||
| // Can be called both within a workflow (as a step) or outside a workflow (directly). | ||
| // When called within a workflow: durability and the function run in the same transaction, and we forbid nested step execution | ||
| func (s *sysDB) send(ctx context.Context, input WorkflowSendInput) error { | ||
| functionName := "DBOS.send" | ||
|
|
||
| // Get workflow state from context (optional for Send as we can send from outside a workflow) | ||
| wfState, ok := ctx.Value(workflowStateKey).(*workflowState) | ||
| var stepID int | ||
| var isInWorkflow bool | ||
|
|
||
| if ok && wfState != nil { | ||
| isInWorkflow = true | ||
| if wfState.isWithinStep { | ||
| return newStepExecutionError(wfState.workflowID, functionName, fmt.Errorf("cannot call Send within a step")) | ||
| } | ||
|
Comment on lines
-2060
to
-2062
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lifted to pre-sysdb invocation |
||
| stepID = wfState.nextStepID() | ||
| } | ||
|
|
||
| if _, ok := input.Message.(*string); !ok { | ||
| return fmt.Errorf("message must be a pointer to a string") | ||
| } | ||
|
|
||
| tx, err := s.pool.Begin(ctx) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to begin transaction: %w", err) | ||
| } | ||
| defer tx.Rollback(ctx) | ||
|
|
||
| startTime := time.Now() | ||
|
|
||
| // Check if operation was already executed and do nothing if so (only if in workflow) | ||
| if isInWorkflow { | ||
| checkInput := checkOperationExecutionDBInput{ | ||
| workflowID: wfState.workflowID, | ||
| stepID: stepID, | ||
| stepName: functionName, | ||
| tx: tx, | ||
| } | ||
| recordedResult, err := s.checkOperationExecution(ctx, checkInput) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| if recordedResult != nil { | ||
| // when hitting this case, recordedResult will be &{<nil> <nil>} | ||
| return nil | ||
| } | ||
| } | ||
|
|
||
| // Set default topic if not provided | ||
| topic := _DBOS_NULL_TOPIC | ||
| if len(input.Topic) > 0 { | ||
| topic = input.Topic | ||
| } | ||
|
|
||
| insertQuery := fmt.Sprintf(`INSERT INTO %s.notifications (destination_uuid, topic, message) VALUES ($1, $2, $3)`, pgx.Identifier{s.schema}.Sanitize()) | ||
| _, err = tx.Exec(ctx, insertQuery, input.DestinationID, topic, input.Message) | ||
| var err error | ||
| if input.tx != nil { | ||
| _, err = input.tx.Exec(ctx, insertQuery, input.DestinationID, topic, input.Message) | ||
| } else { | ||
| _, err = s.pool.Exec(ctx, insertQuery, input.DestinationID, topic, input.Message) | ||
| } | ||
| if err != nil { | ||
| s.logger.Error("failed to insert notification", "error", err, "query", insertQuery, "destination_id", input.DestinationID, "topic", topic, "message", input.Message) | ||
| // Check for foreign key violation (destination workflow doesn't exist) | ||
| if pgErr, ok := err.(*pgconn.PgError); ok && pgErr.Code == _PG_ERROR_FOREIGN_KEY_VIOLATION { | ||
| return newNonExistentWorkflowError(input.DestinationID) | ||
| } | ||
| return fmt.Errorf("failed to insert notification: %w", err) | ||
| } | ||
|
|
||
| // Record the operation result if this is called within a workflow | ||
| if isInWorkflow { | ||
| completedTime := time.Now() | ||
| recordInput := recordOperationResultDBInput{ | ||
| workflowID: wfState.workflowID, | ||
| stepID: stepID, | ||
| stepName: functionName, | ||
| output: nil, | ||
| err: nil, | ||
| tx: tx, | ||
| startedAt: startTime, | ||
| completedAt: completedTime, | ||
| } | ||
|
|
||
| err = s.recordOperationResult(ctx, recordInput) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| } | ||
|
|
||
| // Commit transaction | ||
| if err := tx.Commit(ctx); err != nil { | ||
| return fmt.Errorf("failed to commit transaction: %w", err) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
|
|
@@ -2147,10 +2083,6 @@ func (s *sysDB) recv(ctx context.Context, input recvInput) (*string, error) { | |
| return nil, newStepExecutionError("", functionName, fmt.Errorf("workflow state not found in context: are you running this step within a workflow?")) | ||
| } | ||
|
|
||
| if wfState.isWithinStep { | ||
| return nil, newStepExecutionError(wfState.workflowID, functionName, fmt.Errorf("cannot call Recv within a step")) | ||
| } | ||
|
Comment on lines
-2150
to
-2152
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. lifted to pre-sysdb invocation |
||
|
|
||
| stepID := wfState.nextStepID() | ||
| sleepStepID := wfState.nextStepID() // We will use a sleep step to implement the timeout | ||
| destinationID := wfState.workflowID | ||
|
|
@@ -2261,19 +2193,18 @@ loop: | |
| return nil, fmt.Errorf("failed to begin transaction: %w", err) | ||
| } | ||
| defer tx.Rollback(ctx) | ||
| // Use message_uuid so we delete exactly one row; created_at_epoch_ms can match multiple rows when inserts occur in the same millisecond. | ||
| query = fmt.Sprintf(` | ||
| WITH oldest_entry AS ( | ||
| SELECT destination_uuid, topic, message, created_at_epoch_ms | ||
| FROM %s.notifications | ||
| WHERE destination_uuid = $1 AND topic = $2 | ||
| ORDER BY created_at_epoch_ms ASC | ||
| LIMIT 1 | ||
| ) | ||
| DELETE FROM %s.notifications | ||
| WHERE destination_uuid = (SELECT destination_uuid FROM oldest_entry) | ||
| AND topic = (SELECT topic FROM oldest_entry) | ||
| AND created_at_epoch_ms = (SELECT created_at_epoch_ms FROM oldest_entry) | ||
| RETURNING message`, pgx.Identifier{s.schema}.Sanitize(), pgx.Identifier{s.schema}.Sanitize()) | ||
|
Comment on lines
-2265
to
-2276
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This query can delete more than 1 message if
instead of
The first line converts the return value of The second line (what we have in Python) does the multiplication on double precision, then converts to bigint, which can do a truncation. Because multiplication on floating points can (often) have errors, this meant more volatility in the truncation, which, I think could have contributed to obfuscate this bug. |
||
| WITH oldest_entry AS ( | ||
| SELECT message_uuid, message | ||
| FROM %s.notifications | ||
| WHERE destination_uuid = $1 AND topic = $2 | ||
| ORDER BY created_at_epoch_ms ASC | ||
| LIMIT 1 | ||
| ) | ||
| DELETE FROM %s.notifications | ||
| WHERE message_uuid = (SELECT message_uuid FROM oldest_entry) | ||
| RETURNING message`, pgx.Identifier{s.schema}.Sanitize(), pgx.Identifier{s.schema}.Sanitize()) | ||
|
|
||
| var messageString *string | ||
| err = tx.QueryRow(ctx, query, destinationID, topic).Scan(&messageString) | ||
|
|
@@ -2316,61 +2247,35 @@ loop: | |
| type WorkflowSetEventInput struct { | ||
| Key string | ||
| Message any | ||
| tx pgx.Tx | ||
| } | ||
|
|
||
| func (s *sysDB) setEvent(ctx context.Context, input WorkflowSetEventInput) error { | ||
| functionName := "DBOS.setEvent" | ||
|
|
||
| // Get workflow state from context | ||
| wfState, ok := ctx.Value(workflowStateKey).(*workflowState) | ||
| if !ok || wfState == nil { | ||
| return newStepExecutionError("", functionName, fmt.Errorf("workflow state not found in context: are you running this step within a workflow?")) | ||
| return newStepExecutionError("", "DBOS.setEvent", fmt.Errorf("workflow state not found in context: are you running this step within a workflow?")) | ||
| } | ||
|
|
||
| if _, ok := input.Message.(*string); !ok { | ||
| return fmt.Errorf("message must be a pointer to a string") | ||
| } | ||
|
|
||
| if wfState.isWithinStep { | ||
| return newStepExecutionError(wfState.workflowID, functionName, fmt.Errorf("cannot call SetEvent within a step")) | ||
| } | ||
|
|
||
| stepID := wfState.nextStepID() | ||
|
|
||
| startTime := time.Now() | ||
|
|
||
| tx, err := s.pool.Begin(ctx) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to begin transaction: %w", err) | ||
| } | ||
| defer tx.Rollback(ctx) | ||
|
|
||
| // Check if operation was already executed and do nothing if so | ||
| checkInput := checkOperationExecutionDBInput{ | ||
| workflowID: wfState.workflowID, | ||
| stepID: stepID, | ||
| stepName: functionName, | ||
| tx: tx, | ||
| } | ||
| recordedResult, err := s.checkOperationExecution(ctx, checkInput) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| if recordedResult != nil { | ||
| // when hitting this case, recordedResult will be &{<nil> <nil>} | ||
| return nil | ||
| } | ||
|
|
||
| // input.Message is already encoded *string from the typed layer | ||
| // Insert or update the event using UPSERT | ||
| insertQuery := fmt.Sprintf(`INSERT INTO %s.workflow_events (workflow_uuid, key, value) | ||
| VALUES ($1, $2, $3) | ||
| ON CONFLICT (workflow_uuid, key) | ||
| DO UPDATE SET value = EXCLUDED.value`, pgx.Identifier{s.schema}.Sanitize()) | ||
|
|
||
| _, err = tx.Exec(ctx, insertQuery, wfState.workflowID, input.Key, input.Message) | ||
| var err error | ||
| if input.tx != nil { | ||
| _, err = input.tx.Exec(ctx, insertQuery, wfState.workflowID, input.Key, input.Message) | ||
| } else { | ||
| _, err = s.pool.Exec(ctx, insertQuery, wfState.workflowID, input.Key, input.Message) | ||
| } | ||
| if err != nil { | ||
| return fmt.Errorf("failed to insert/update workflow event: %w", err) | ||
| return fmt.Errorf("failed to insert event: %w", err) | ||
| } | ||
|
|
||
| // Record event in workflow_events_history | ||
|
|
@@ -2379,35 +2284,12 @@ func (s *sysDB) setEvent(ctx context.Context, input WorkflowSetEventInput) error | |
| ON CONFLICT (workflow_uuid, function_id, key) | ||
| DO UPDATE SET value = EXCLUDED.value`, pgx.Identifier{s.schema}.Sanitize()) | ||
|
|
||
| _, err = tx.Exec(ctx, insertHistoryQuery, wfState.workflowID, stepID, input.Key, input.Message) | ||
| if err != nil { | ||
| return fmt.Errorf("failed to insert workflow event history: %w", err) | ||
| } | ||
|
|
||
| // Record the operation result | ||
| completedTime := time.Now() | ||
| recordInput := recordOperationResultDBInput{ | ||
| workflowID: wfState.workflowID, | ||
| stepID: stepID, | ||
| stepName: functionName, | ||
| output: nil, | ||
| err: nil, | ||
| tx: tx, | ||
| startedAt: startTime, | ||
| completedAt: completedTime, | ||
| } | ||
|
|
||
| err = s.recordOperationResult(ctx, recordInput) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
||
| // Commit transaction | ||
| if err := tx.Commit(ctx); err != nil { | ||
| return fmt.Errorf("failed to commit transaction: %w", err) | ||
| if input.tx != nil { | ||
| _, err = input.tx.Exec(ctx, insertHistoryQuery, wfState.workflowID, wfState.stepID, input.Key, input.Message) | ||
| } else { | ||
| _, err = s.pool.Exec(ctx, insertHistoryQuery, wfState.workflowID, wfState.stepID, input.Key, input.Message) | ||
| } | ||
|
|
||
| return nil | ||
| return err | ||
| } | ||
|
|
||
| func (s *sysDB) getEvent(ctx context.Context, input getEventInput) (*string, error) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lifted to pre-sysdb invocation