Azure · evelyn-ys · Mar 22, 2022 · Mar 21, 2022 · Mar 21, 2022 · Mar 22, 2022
@@ -191,7 +191,7 @@ def default_api_version(self):
         ResourceType.DATA_KEYVAULT_ADMINISTRATION_BACKUP: '7.2-preview',
         ResourceType.DATA_KEYVAULT_ADMINISTRATION_ACCESS_CONTROL: '7.2-preview',
         ResourceType.DATA_STORAGE: '2018-11-09',
-        ResourceType.DATA_STORAGE_BLOB: '2020-10-02',
+        ResourceType.DATA_STORAGE_BLOB: '2021-04-10',
         ResourceType.DATA_STORAGE_FILEDATALAKE: '2020-02-10',
         ResourceType.DATA_STORAGE_FILESHARE: '2019-07-07',
         ResourceType.DATA_STORAGE_QUEUE: '2018-03-28',

@@ -1184,6 +1184,14 @@
         az storage blob upload-batch -d mycontainer -s <path-to-directory> --pattern cli-201[!89]-??-??.txt
 """
 
+helps['storage blob download'] = """
+type: command
+short-summary: Download a blob to a file path.
+examples:
+  - name: Download a blob.
+    text: az storage blob download -f /path/to/file -c mycontainer -n MyBlob
+"""
+
 helps['storage blob url'] = """
 type: command
 short-summary: Create the url to access a blob.

@@ -23,7 +23,7 @@
                           validate_fs_public_access, validate_logging_version, validate_or_policy, validate_policy,
                           get_api_version_type, blob_download_file_path_validator, blob_tier_validator, validate_subnet,
                           validate_immutability_arguments, validate_blob_name_for_upload, validate_share_close_handle,
-                          add_upload_progress_callback, blob_tier_validator_track2)
+                          add_upload_progress_callback, blob_tier_validator_track2, add_download_progress_callback)
 
 
 def load_arguments(self, _):  # pylint: disable=too-many-locals, too-many-statements, too-many-lines, too-many-branches, line-too-long
@@ -247,6 +247,11 @@ def load_arguments(self, _):  # pylint: disable=too-many-locals, too-many-statem
     public_network_access_enum = self.get_sdk('models._storage_management_client_enums#PublicNetworkAccess',
                                               resource_type=ResourceType.MGMT_STORAGE)
 
+    version_id_type = CLIArgumentType(
+        help='An optional blob version ID. This parameter is only for versioning enabled account. ',
+        min_api='2019-12-12', is_preview=True
+    )
+
     with self.argument_context('storage') as c:
         c.argument('container_name', container_name_type)
         c.argument('directory_name', directory_type)
@@ -986,22 +991,45 @@ def load_arguments(self, _):  # pylint: disable=too-many-locals, too-many-statem
                      'blob. If set overwrite=True, then the existing append blob will be deleted, and a new one created. '
                      'Defaults to False.')
 
-    with self.argument_context('storage blob download') as c:
-        c.argument('file_path', options_list=('--file', '-f'), type=file_type,
-                   completer=FilesCompleter(), validator=blob_download_file_path_validator)
-        c.argument('max_connections', type=int)
-        c.argument('start_range', type=int)
-        c.argument('end_range', type=int)
-        c.argument('validate_content', action='store_true', min_api='2016-05-31')
-        c.extra('no_progress', progress_type)
+    with self.argument_context('storage blob download', resource_type=ResourceType.DATA_STORAGE_BLOB) as c:
+        c.register_blob_arguments_track2()
+        c.register_precondition_options()
+        c.argument('file_path', options_list=('--file', '-f'), type=file_type, completer=FilesCompleter(),
+                   help='Path of file to write out to.', validator=blob_download_file_path_validator)
+        c.argument('start_range', type=int,
+                   help='Start of byte range to use for downloading a section of the blob. If no end_range is given, '
+                        'all bytes after the start_range will be downloaded. The start_range and end_range params are '
+                        'inclusive. Ex: start_range=0, end_range=511 will download first 512 bytes of blob.')
+        c.argument('end_range', type=int,
+                   help='End of byte range to use for downloading a section of the blob. If end_range is given, '
+                        'start_range must be provided. The start_range and end_range params are inclusive. '
+                        'Ex: start_range=0, end_range=511 will download first 512 bytes of blob.')
+        c.extra('no_progress', progress_type, validator=add_download_progress_callback)
+        c.extra('snapshot', help='The snapshot parameter is an opaque DateTime value that, when present, '
+                                 'specifies the blob snapshot to retrieve.')
+        c.extra('lease', options_list=['--lease-id'], help='Required if the blob has an active lease.')
+        c.extra('version_id', version_id_type)
+        c.extra('max_concurrency', options_list=['--max-connections'], type=int, default=2,
+                help='The number of parallel connections with which to download.')
+        c.argument('open_mode', help='Mode to use when opening the file. Note that specifying append only open_mode '
+                                     'prevents parallel download. So, max_connections must be set to 1 '
+                                     'if this open_mode is used.')
+        c.extra('validate_content', action='store_true', min_api='2016-05-31',
+                help='If true, calculates an MD5 hash for each chunk of the blob. The storage service checks the '
+                     'hash of the content that has arrived with the hash that was sent. This is primarily valuable for '
+                     'detecting bitflips on the wire if using http instead of https, as https (the default), '
+                     'will already validate. Note that this MD5 hash is not stored with the blob. Also note that '
+                     'if enabled, the memory-efficient algorithm will not be used because computing the MD5 hash '
+                     'requires buffering entire blocks, and doing so defeats the purpose of the memory-efficient '
+                     'algorithm.')
 
     with self.argument_context('storage blob download-batch') as c:
         c.ignore('source_container_name')
         c.argument('destination', options_list=('--destination', '-d'))
         c.argument('source', options_list=('--source', '-s'))
         c.extra('no_progress', progress_type)
-        c.argument('max_connections', type=int,
-                   help='Maximum number of parallel connections to use when the blob size exceeds 64MB.')
+        c.extra('max_concurrency', options_list=['--max-connections'], type=int, default=2,
+                help='The number of parallel connections with which to download.')
 
     with self.argument_context('storage blob delete') as c:
         from .sdkutil import get_delete_blob_snapshot_type_names

@@ -1025,15 +1025,16 @@ def process_container_delete_parameters(cmd, namespace):
 
 def process_blob_download_batch_parameters(cmd, namespace):
     """Process the parameters for storage blob download command"""
+    from azure.cli.core.azclierror import InvalidArgumentValueError
     # 1. quick check
     if not os.path.exists(namespace.destination) or not os.path.isdir(namespace.destination):
-        raise ValueError('incorrect usage: destination must be an existing directory')
+        raise InvalidArgumentValueError('incorrect usage: destination must be an existing directory')
 
     # 2. try to extract account name and container name from source string
     _process_blob_batch_container_parameters(cmd, namespace)
 
     # 3. Call validators
-    add_progress_callback(cmd, namespace)
+    add_download_progress_callback(cmd, namespace)
 
 
 def process_blob_upload_batch_parameters(cmd, namespace):
@@ -2033,6 +2034,29 @@ def _update_progress(response):
     del namespace.no_progress
 
 
+def add_download_progress_callback(cmd, namespace):
+    def _update_progress(response):
+        if response.http_response.status_code not in [200, 201, 206]:
+            return
+
+        message = getattr(_update_progress, 'message', 'Alive')
+        reuse = getattr(_update_progress, 'reuse', False)
+        current = response.context['download_stream_current']
+        total = response.context['data_stream_total']
+
+        if total:
+            hook.add(message=message, value=current, total_val=total)
+            if total == current and not reuse:
+                hook.end()
+
+    hook = cmd.cli_ctx.get_progress_controller(det=True)
+    _update_progress.hook = hook
+
+    if not namespace.no_progress:
+        namespace.progress_callback = _update_progress
+    del namespace.no_progress
+
+
 def validate_blob_arguments(namespace):
     from azure.cli.core.azclierror import RequiredArgumentMissingError
     if not namespace.blob_url and not all([namespace.blob_name, namespace.container_name]):

@@ -317,7 +317,7 @@ def get_custom_sdk(custom_module, client_factory, resource_type=ResourceType.DAT
         from ._transformers import transform_blob_list_output, transform_blob_json_output, transform_blob_upload_output
         from ._format import transform_blob_output
         from ._exception_handler import file_related_exception_handler
-        from ._validators import process_blob_upload_batch_parameters
+        from ._validators import process_blob_upload_batch_parameters, process_blob_download_batch_parameters
         g.storage_custom_command_oauth('copy start', 'copy_blob')
         g.storage_custom_command_oauth('show', 'show_blob_v2', transform=transform_blob_json_output,
                                        table_transformer=transform_blob_output,
@@ -337,6 +337,13 @@ def get_custom_sdk(custom_module, client_factory, resource_type=ResourceType.DAT
         g.storage_custom_command_oauth('upload-batch', 'storage_blob_upload_batch', client_factory=cf_blob_service,
                                        validator=process_blob_upload_batch_parameters,
                                        exception_handler=file_related_exception_handler)
+        g.storage_custom_command_oauth('download', 'download_blob',
+                                       transform=transform_blob_json_output,
+                                       table_transformer=transform_blob_output,
+                                       exception_handler=file_related_exception_handler)
+        g.storage_custom_command_oauth('download-batch', 'storage_blob_download_batch', client_factory=cf_blob_service,
+                                       validator=process_blob_download_batch_parameters,
+                                       exception_handler=file_related_exception_handler)
 
     blob_lease_client_sdk = CliCommandType(
         operations_tmpl='azure.multiapi.storagev2.blob._lease#BlobLeaseClient.{}',
@@ -359,11 +366,11 @@ def get_custom_sdk(custom_module, client_factory, resource_type=ResourceType.DAT
         from ._format import transform_boolean_for_table, transform_blob_output
         from ._transformers import (transform_storage_list_output, transform_url,
                                     create_boolean_result_output_transformer)
-        from ._validators import (process_blob_download_batch_parameters, process_blob_delete_batch_parameters)
+        from ._validators import process_blob_delete_batch_parameters
         from ._exception_handler import file_related_exception_handler
-        g.storage_command_oauth(
-            'download', 'get_blob_to_path', table_transformer=transform_blob_output,
-            exception_handler=file_related_exception_handler)
+        # g.storage_command_oauth(
+        #     'download', 'get_blob_to_path', table_transformer=transform_blob_output,
+        #     exception_handler=file_related_exception_handler)
         g.storage_custom_command_oauth('generate-sas', 'generate_sas_blob_uri')
         g.storage_custom_command_oauth(
             'url', 'create_blob_url', transform=transform_url)
@@ -376,9 +383,9 @@ def get_custom_sdk(custom_module, client_factory, resource_type=ResourceType.DAT
                                 transform=create_boolean_result_output_transformer(
                                     'undeleted'),
                                 table_transformer=transform_boolean_for_table, min_api='2017-07-29')
-        g.storage_custom_command_oauth('download-batch', 'storage_blob_download_batch',
-                                       validator=process_blob_download_batch_parameters,
-                                       exception_handler=file_related_exception_handler)
+        # g.storage_custom_command_oauth('download-batch', 'storage_blob_download_batch',
+        #                                validator=process_blob_download_batch_parameters,
+        #                                exception_handler=file_related_exception_handler)
         g.storage_custom_command_oauth('delete-batch', 'storage_blob_delete_batch',
                                        validator=process_blob_delete_batch_parameters)
         g.storage_command_oauth(

@@ -354,17 +354,10 @@ def action_file_copy(file_info):
 
 # pylint: disable=unused-argument
 def storage_blob_download_batch(client, source, destination, source_container_name, pattern=None, dryrun=False,
-                                progress_callback=None, max_connections=2):
-
-    def _download_blob(blob_service, container, destination_folder, normalized_blob_name, blob_name):
-        # TODO: try catch IO exception
-        destination_path = os.path.join(destination_folder, normalized_blob_name)
-        destination_folder = os.path.dirname(destination_path)
-        if not os.path.exists(destination_folder):
-            mkdir_p(destination_folder)
-
-        blob = blob_service.get_blob_to_path(container, blob_name, destination_path, max_connections=max_connections,
-                                             progress_callback=progress_callback)
+                                progress_callback=None, **kwargs):
+    @check_precondition_success
+    def _download_blob(*args, **kwargs):
+        blob = download_blob(*args, **kwargs)
         return blob.name
 
     source_blobs = collect_blobs(client, source_container_name, pattern)
@@ -394,17 +387,34 @@ def _download_blob(blob_service, container, destination_folder, normalized_blob_
 
     results = []
     for index, blob_normed in enumerate(blobs_to_download):
+        from azure.cli.core.azclierror import FileOperationError
         # add blob name and number to progress message
         if progress_callback:
             progress_callback.message = '{}/{}: "{}"'.format(
                 index + 1, len(blobs_to_download), blobs_to_download[blob_normed])
-        results.append(_download_blob(
-            client, source_container_name, destination, blob_normed, blobs_to_download[blob_normed]))
+        blob_client = client.get_blob_client(container=source_container_name,
+                                             blob=blobs_to_download[blob_normed])
+        destination_path = os.path.join(destination, os.path.normpath(blob_normed))
+        destination_folder = os.path.dirname(destination_path)
+        # Failed when there is same name for file and folder
+        if os.path.isfile(destination_path) and os.path.exists(destination_folder):
+            raise FileOperationError("%s already exists in %s. Please rename existing file or choose another "
+                                     "destination folder. ")
+        if not os.path.exists(destination_folder):
+            mkdir_p(destination_folder)
+        include, result = _download_blob(client=blob_client, file_path=destination_path,
+                                         progress_callback=progress_callback, **kwargs)
+        if include:
+            results.append(result)
 
     # end progress hook
     if progress_callback:
         progress_callback.hook.end()
 
+    num_failures = len(blobs_to_download) - len(results)
+    if num_failures:
+        logger.warning('%s of %s files not downloaded due to "Failed Precondition"',
+                       num_failures, len(blobs_to_download))
     return results
 
 
@@ -589,6 +599,22 @@ def upload_blob(cmd, client, file_path=None, container_name=None, blob_name=None
     return response
 
 
+def download_blob(client, file_path, open_mode='wb', start_range=None, end_range=None,
+                  progress_callback=None, **kwargs):
+    offset = None
+    length = None
+    if start_range is not None and end_range is not None:
+        offset = start_range
+        length = end_range - start_range + 1
+    if progress_callback:
+        kwargs['raw_response_hook'] = progress_callback
+    download_stream = client.download_blob(offset=offset, length=length, **kwargs)
+    with open(file_path, open_mode) as stream:
+        download_stream.readinto(stream)
+
+    return download_stream.properties
+
+
 def get_block_ids(content_length, block_length):
     """Get the block id arrary from block blob length, block size"""
     block_count = 0