-
Notifications
You must be signed in to change notification settings - Fork 27
[PECOBLR-1131] Fix incorrect refetching of expired CloudFetch links when using Thrift protocol. #1066
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[PECOBLR-1131] Fix incorrect refetching of expired CloudFetch links when using Thrift protocol. #1066
Changes from 12 commits
e2000ad
84e129d
7d198ec
581b98d
fbedf44
58eaec5
60fc546
0794307
0e633e3
070069e
4d79420
13b33a0
51fca9e
ece4152
fdb10bd
eca4f03
25b7e5d
933fbfc
6bd358f
ba249e9
c649d57
44453e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -214,13 +214,18 @@ private void triggerNextBatchDownload() { | |
| return; | ||
| } | ||
|
|
||
| // Calculate row offset for this batch | ||
| final long batchStartRowOffset = getChunkStartRowOffset(batchStartIndex); | ||
|
|
||
| LOGGER.info("Starting batch download from index {}", batchStartIndex); | ||
| currentDownloadTask = | ||
| CompletableFuture.runAsync( | ||
| () -> { | ||
| try { | ||
| Collection<ExternalLink> links = | ||
| session.getDatabricksClient().getResultChunks(statementId, batchStartIndex); | ||
| session | ||
| .getDatabricksClient() | ||
| .getResultChunks(statementId, batchStartIndex, batchStartRowOffset); | ||
| LOGGER.info( | ||
| "Retrieved {} links for batch starting at {} for statement id {}", | ||
| links.size(), | ||
|
|
@@ -413,6 +418,28 @@ private void prepareNewBatchDownload(long startIndex) { | |
| isDownloadChainStarted.set(false); | ||
| } | ||
|
|
||
| /** | ||
| * Gets the start row offset for a given chunk index. | ||
| * | ||
| * @param chunkIndex the chunk index to get the row offset for | ||
| * @return the start row offset for the chunk | ||
| */ | ||
| private long getChunkStartRowOffset(long chunkIndex) { | ||
| T chunk = chunkIndexToChunksMap.get(chunkIndex); | ||
| if (chunk == null) { | ||
| // Should never happen. | ||
| throw new IllegalStateException( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's throw
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could create a new exception stating |
||
| "Chunk not found in map for index " | ||
| + chunkIndex | ||
| + ". " | ||
| + "Total chunks: " | ||
| + totalChunks | ||
| + ", StatementId: " | ||
| + statementId); | ||
| } | ||
| return chunk.getStartRowOffset(); | ||
| } | ||
|
|
||
| private boolean isChunkLinkExpired(ExternalLink link) { | ||
| if (link == null || link.getExpiration() == null) { | ||
| LOGGER.warn("Link or expiration is null, assuming link is expired"); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -128,14 +128,32 @@ TBase getThriftResponse(TBase request) throws DatabricksSQLException { | |
| } | ||
| } | ||
|
|
||
| TFetchResultsResp getResultSetResp(TOperationHandle operationHandle, String context) | ||
| /** | ||
| * Fetch the next set of results for the given operation handle with default settings. | ||
| * | ||
| * @param operationHandle the operation handle | ||
| * @return TFetchResultsResp containing the results | ||
| * @throws DatabricksHttpException if fetch fails | ||
| */ | ||
| TFetchResultsResp getResultSetResp(TOperationHandle operationHandle) | ||
| throws DatabricksHttpException { | ||
| TFetchResultsReq req = createFetchResultsReqWithDefaults(operationHandle); | ||
| return executeFetchRequest(req); | ||
| } | ||
|
|
||
| /** | ||
| * Fetches results starting from a specific row offset. | ||
| * | ||
| * @param operationHandle the operation handle | ||
| * @param startRowOffset the row offset to start fetching from | ||
| * @return TFetchResultsResp containing the results | ||
| * @throws DatabricksHttpException if fetch fails | ||
| */ | ||
| TFetchResultsResp getResultSetResp(TOperationHandle operationHandle, long startRowOffset) | ||
| throws DatabricksHttpException { | ||
| return getResultSetResp( | ||
| new TStatus().setStatusCode(TStatusCode.SUCCESS_STATUS), | ||
| operationHandle, | ||
| context, | ||
| maxRowsPerBlock, | ||
| false); | ||
| TFetchResultsReq req = createFetchResultsReqWithDefaults(operationHandle); | ||
| req.setStartRowOffset(startRowOffset); | ||
| return executeFetchRequest(req); | ||
| } | ||
|
|
||
| TCancelOperationResp cancelOperation(TCancelOperationReq req) throws DatabricksHttpException { | ||
|
|
@@ -166,16 +184,10 @@ TCloseOperationResp closeOperation(TCloseOperationReq req) throws DatabricksHttp | |
|
|
||
| TFetchResultsResp getMoreResults(IDatabricksStatementInternal parentStatement) | ||
| throws DatabricksSQLException { | ||
| String context = | ||
| String.format( | ||
| "Fetching more results as it has more rows %s", | ||
| parentStatement.getStatementId().toSQLExecStatementId()); | ||
| return getResultSetResp( | ||
| new TStatus().setStatusCode(TStatusCode.SUCCESS_STATUS), | ||
| getOperationHandle(parentStatement.getStatementId()), | ||
| context, | ||
| maxRowsPerBlock, | ||
| true); | ||
| TFetchResultsReq req = | ||
| createFetchResultsReqWithDefaults(getOperationHandle(parentStatement.getStatementId())); | ||
| setFetchMetadata(req); | ||
| return executeFetchRequest(req); | ||
| } | ||
|
|
||
| DatabricksResultSet execute( | ||
|
|
@@ -227,15 +239,16 @@ DatabricksResultSet execute( | |
| resultSet = response.getDirectResults().getResultSet(); | ||
| resultSet.setResultSetMetadata(response.getDirectResults().getResultSetMetadata()); | ||
| } else { | ||
| verifySuccessStatus( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we already verify response at line 215
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be moved to after
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. verifySuccessStatus is taking |
||
| response.getStatus(), "executeStatement", statementId.toSQLExecStatementId()); | ||
|
|
||
| // Fetch the result data after polling | ||
| TFetchResultsReq resultsReq = | ||
| createFetchResultsReqWithDefaults(response.getOperationHandle()); | ||
| setFetchMetadata(resultsReq); | ||
| long fetchStartTime = System.nanoTime(); | ||
| resultSet = | ||
| getResultSetResp( | ||
| response.getStatus(), | ||
| response.getOperationHandle(), | ||
| "executeStatement", | ||
| maxRowsPerBlock, | ||
| true); | ||
| resultSet = executeFetchRequest(resultsReq); | ||
|
|
||
| long fetchEndTime = System.nanoTime(); | ||
| long fetchLatencyNanos = fetchEndTime - fetchStartTime; | ||
| long fetchLatencyMillis = fetchLatencyNanos / 1_000_000; | ||
|
|
@@ -389,11 +402,18 @@ DatabricksResultSet getStatementResult( | |
| TFetchResultsResp resultSet = null; | ||
| try { | ||
| response = getOperationStatus(request, statementId); | ||
| verifySuccessStatus( | ||
| response.getStatus(), "getStatementResult", statementId.toSQLExecStatementId()); | ||
|
|
||
| TOperationState operationState = response.getOperationState(); | ||
| if (operationState == TOperationState.FINISHED_STATE) { | ||
| long fetchStartTime = System.nanoTime(); | ||
| resultSet = | ||
| getResultSetResp(response.getStatus(), operationHandle, "getStatementResult", -1, true); | ||
|
|
||
| TFetchResultsReq resultsReq = createFetchResultsReqWithDefaults(operationHandle); | ||
| resultsReq.setMaxRows(-1); | ||
| setFetchMetadata(resultsReq); | ||
| resultSet = executeFetchRequest(resultsReq); | ||
|
|
||
| long fetchEndTime = System.nanoTime(); | ||
| long fetchLatencyNanos = fetchEndTime - fetchStartTime; | ||
| long fetchLatencyMillis = fetchLatencyNanos / 1_000_000; | ||
|
|
@@ -481,46 +501,46 @@ void updateConfig(DatabricksConfig newConfig) { | |
| this.databricksConfig = newConfig; | ||
| } | ||
|
|
||
| TFetchResultsResp getResultSetResp( | ||
| TStatus responseStatus, | ||
| TOperationHandle operationHandle, | ||
| String context, | ||
| int maxRowsPerBlock, | ||
| boolean fetchMetadata) | ||
| private TFetchResultsResp executeFetchRequest(TFetchResultsReq request) | ||
| throws DatabricksHttpException { | ||
| String statementId = StatementId.loggableStatementId(operationHandle); | ||
| verifySuccessStatus(responseStatus, context, statementId); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jayantsing-db Checking the validity of a previously received response is not the responsibility of this function. Moved it to outside this function at the point where the response is received. |
||
| TFetchResultsReq request = | ||
| new TFetchResultsReq() | ||
| .setOperationHandle(operationHandle) | ||
| .setFetchType((short) 0) // 0 represents Query output. 1 represents Log | ||
| .setMaxRows( | ||
| maxRowsPerBlock) // Max number of rows that should be returned in the rowset. | ||
| .setMaxBytes(DEFAULT_BYTE_LIMIT); | ||
| if (fetchMetadata | ||
| && ProtocolFeatureUtil.supportsResultSetMetadataFromFetch(serverProtocolVersion)) { | ||
| request.setIncludeResultSetMetadata(true); // fetch metadata if supported | ||
| } | ||
| TFetchResultsResp response; | ||
| try { | ||
| response = getThriftClient().FetchResults(request); | ||
| } catch (TException e) { | ||
| String errorMessage = | ||
| String.format( | ||
| "Error while fetching results from Thrift server. Request maxRows=%d, maxBytes=%d, Error {%s}", | ||
| "Error while fetching results from Thrift server. Request maxRows=%d, " | ||
| + "maxBytes=%d, Error {%s}", | ||
| request.getMaxRows(), request.getMaxBytes(), e.getMessage()); | ||
| LOGGER.error(e, errorMessage); | ||
| throw new DatabricksHttpException(errorMessage, e, DatabricksDriverErrorCode.INVALID_STATE); | ||
| } | ||
|
|
||
| String statementId = StatementId.loggableStatementId(request.getOperationHandle()); | ||
| verifySuccessStatus( | ||
| response.getStatus(), | ||
| String.format( | ||
| "Error while fetching results Request maxRows=%d, maxBytes=%d. Response hasMoreRows=%s", | ||
| "Error while fetching results Request maxRows=%d, maxBytes=%d. " | ||
| + "Response hasMoreRows=%s", | ||
| request.getMaxRows(), request.getMaxBytes(), response.hasMoreRows), | ||
| statementId); | ||
|
|
||
| return response; | ||
| } | ||
|
|
||
| private TFetchResultsReq createFetchResultsReqWithDefaults(TOperationHandle operationHandle) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for abstracting it in a single method |
||
| return new TFetchResultsReq() | ||
| .setOperationHandle(operationHandle) | ||
| .setFetchType((short) 0) // 0 represents Query output. 1 represents Log | ||
| .setMaxRows(maxRowsPerBlock) // Max number of rows that should be returned in the rowset. | ||
| .setMaxBytes(DEFAULT_BYTE_LIMIT); | ||
| } | ||
|
|
||
| private void setFetchMetadata(TFetchResultsReq request) { | ||
| if (ProtocolFeatureUtil.supportsResultSetMetadataFromFetch(serverProtocolVersion)) { | ||
| request.setIncludeResultSetMetadata(true); | ||
| } | ||
| } | ||
|
|
||
| private TFetchResultsResp listFunctions(TGetFunctionsReq request) | ||
| throws TException, DatabricksSQLException { | ||
| if (enableDirectResults) request.setGetDirectResults(DEFAULT_DIRECT_RESULTS); | ||
|
|
@@ -663,8 +683,11 @@ TFetchResultsResp fetchMetadataResults(TResp response, String contextDescription | |
| // Fetch the result data after polling | ||
| FResp statusField = response.fieldForId(statusFieldId); | ||
| TStatus status = (TStatus) response.getFieldValue(statusField); | ||
| return getResultSetResp( | ||
| status, operationHandle, contextDescription, DEFAULT_ROW_LIMIT_PER_BLOCK, false); | ||
| verifySuccessStatus(status, contextDescription, statementId); | ||
|
|
||
| TFetchResultsReq resultsReq = createFetchResultsReqWithDefaults(operationHandle); | ||
| resultsReq.setMaxRows(DEFAULT_ROW_LIMIT_PER_BLOCK); | ||
| return executeFetchRequest(resultsReq); | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Optional comment is not aligned with the return type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.