diff --git a/CHANGELOG.md b/CHANGELOG.md index dca7d54e..1d53e7e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Compress and Extract end-points now support multiple compresion types (none, bz2, gzip, and xz). + ### Changed ## [2.3.1] diff --git a/src/firecrest/filesystem/ops/commands/tar_command.py b/src/firecrest/filesystem/ops/commands/tar_command.py index f4ec6be7..3131358f 100644 --- a/src/firecrest/filesystem/ops/commands/tar_command.py +++ b/src/firecrest/filesystem/ops/commands/tar_command.py @@ -6,6 +6,8 @@ from enum import Enum import os +from fastapi import HTTPException, status + from firecrest.filesystem.ops.commands.base_command_with_timeout import ( BaseCommandWithTimeout, ) @@ -17,12 +19,19 @@ class Operation(str, Enum): compress = "compress" extract = "extract" + class CompressionType(str, Enum): + none = "none" + bzip2 = "bzip2" + gzip = "gzip" + xz = "xz" + def __init__( self, source_path: str, target_path: str, match_pattern: str = None, dereference: bool = False, + compression: CompressionType = CompressionType.gzip, operation: Operation = Operation.compress, ) -> None: super().__init__() @@ -32,6 +41,21 @@ def __init__( self.dereference = dereference self.operation = operation + match compression: + case TarCommand.CompressionType.none: + self.compression_flag = "" + case TarCommand.CompressionType.gzip: + self.compression_flag = "z" + case TarCommand.CompressionType.bzip2: + self.compression_flag = "j" + case TarCommand.CompressionType.xz: + self.compression_flag = "J" + case _: + raise HTTPException( + status_code=status.HTTP_501_NOT_IMPLEMENTED, + detail="The requested compression type is not implemented.", + ) + def get_command( self, ) -> str: @@ -53,11 +77,11 @@ def get_compress_command(self) -> str: if self.match_pattern: return f"{super().get_command()} bash -c \"cd {source_dir}; {super().get_command()} find . -type f -regex '{self.match_pattern}' -print0 | tar {options} -czvf '{self.target_path}' --null --files-from - \"" - return f"{super().get_command()} tar {options} -czvf '{self.target_path}' -C '{source_dir}' '{source_file}'" + return f"{super().get_command()} tar {options} -c{self.compression_flag}vf '{self.target_path}' -C '{source_dir}' '{source_file}'" def get_extract_command(self) -> str: - return f"{super().get_command()} tar -xzf '{self.source_path}' -C '{self.target_path}'" + return f"{super().get_command()} tar -x{self.compression_flag}f '{self.source_path}' -C '{self.target_path}'" def parse_output(self, stdout: str, stderr: str, exit_status: int): if exit_status != 0: diff --git a/src/firecrest/filesystem/ops/models.py b/src/firecrest/filesystem/ops/models.py index 0b8b9817..1eb418ba 100644 --- a/src/firecrest/filesystem/ops/models.py +++ b/src/firecrest/filesystem/ops/models.py @@ -10,6 +10,7 @@ from firecrest.filesystem.models import FilesystemRequestBase from lib.models import CamelModel from pydantic import Field +from firecrest.filesystem.ops.commands.tar_command import TarCommand class ContentUnit(str, Enum): @@ -100,12 +101,7 @@ class PutFileChmodRequest(FilesystemRequestBase): mode: str = Field(..., description="Mode in octal permission format") model_config = { "json_schema_extra": { - "examples": [ - { - "path": "/home/user/dir/file.out", - "mode": "777" - } - ] + "examples": [{"path": "/home/user/dir/file.out", "mode": "777"}] } } @@ -115,15 +111,19 @@ class PutFileChmodResponse(CamelModel): class PutFileChownRequest(FilesystemRequestBase): - owner: Optional[str] = Field(default="", description="User name of the new user owner of the file") - group: Optional[str] = Field(default="", description="Group name of the new group owner of the file") + owner: Optional[str] = Field( + default="", description="User name of the new user owner of the file" + ) + group: Optional[str] = Field( + default="", description="Group name of the new group owner of the file" + ) model_config = { "json_schema_extra": { "examples": [ { "path": "/home/user/dir/file.out", "owner": "user", - "group": "my-group" + "group": "my-group", } ] } @@ -135,15 +135,13 @@ class PutFileChownResponse(CamelModel): class PostMakeDirRequest(FilesystemRequestBase): - parent: Optional[bool] = Field(default=False, description="If set to `true` creates all its parent directories if they do not already exist") + parent: Optional[bool] = Field( + default=False, + description="If set to `true` creates all its parent directories if they do not already exist", + ) model_config = { "json_schema_extra": { - "examples": [ - { - "path": "/home/user/dir/newdir", - "parent": "true" - } - ] + "examples": [{"path": "/home/user/dir/newdir", "parent": "true"}] } } @@ -152,12 +150,7 @@ class PostFileSymlinkRequest(FilesystemRequestBase): link_path: str = Field(..., description="Path to the new symlink") model_config = { "json_schema_extra": { - "examples": [ - { - "path": "/home/user/dir", - "link_path": "/home/user/newlink" - } - ] + "examples": [{"path": "/home/user/dir", "link_path": "/home/user/newlink"}] } } @@ -176,8 +169,17 @@ class PostMkdirResponse(CamelModel): class PostCompressRequest(FilesystemRequestBase): target_path: str = Field(..., description="Path to the compressed file") - match_pattern: Optional[str] = Field(default=None, description="Regex pattern to filter files to compress") - dereference: Optional[bool] = Field(default=False, description="If set to `true`, it follows symbolic links and archive the files they point to instead of the links themselves.") + match_pattern: Optional[str] = Field( + default=None, description="Regex pattern to filter files to compress" + ) + dereference: Optional[bool] = Field( + default=False, + description="If set to `true`, it follows symbolic links and archive the files they point to instead of the links themselves.", + ) + compression: Optional[TarCommand.CompressionType] = Field( + default="gzip", + description="Defines the type of compression to be used. By default gzip is used.", + ) model_config = { "json_schema_extra": { "examples": [ @@ -185,7 +187,8 @@ class PostCompressRequest(FilesystemRequestBase): "path": "/home/user/dir", "target_path": "/home/user/file.tar.gz", "match_pattern": "*./[ab].*\\.txt", - "dereference": "true" + "dereference": "true", + "compression": "none", } ] } @@ -193,13 +196,20 @@ class PostCompressRequest(FilesystemRequestBase): class PostExtractRequest(FilesystemRequestBase): - target_path: str = Field(..., description="Path to the directory where to extract the compressed file") + target_path: str = Field( + ..., description="Path to the directory where to extract the compressed file" + ) + compression: Optional[TarCommand.CompressionType] = Field( + default="gzip", + description="Defines the type of compression to be used. By default gzip is used.", + ) model_config = { "json_schema_extra": { "examples": [ { "path": "/home/user/dir/file.tar.gz", - "target_path": "/home/user/dir" + "target_path": "/home/user/dir", + "compression": "none", } ] } diff --git a/src/firecrest/filesystem/ops/router.py b/src/firecrest/filesystem/ops/router.py index 756f1435..9b3d36b1 100644 --- a/src/firecrest/filesystem/ops/router.py +++ b/src/firecrest/filesystem/ops/router.py @@ -654,8 +654,9 @@ async def post_compress( tar = TarCommand( request_model.path, request_model.target_path, - request_model.match_pattern, - request_model.dereference, + match_pattern=request_model.match_pattern, + dereference=request_model.dereference, + compression=request_model.compression, operation=TarCommand.Operation.compress, ) @@ -684,6 +685,7 @@ async def post_extract( tar = TarCommand( request_model.path, request_model.target_path, + compression=request_model.compression, operation=TarCommand.Operation.extract, ) diff --git a/src/firecrest/filesystem/transfer/models.py b/src/firecrest/filesystem/transfer/models.py index 2d55f98f..3d7d5916 100644 --- a/src/firecrest/filesystem/transfer/models.py +++ b/src/firecrest/filesystem/transfer/models.py @@ -10,6 +10,7 @@ from firecrest.filesystem.models import FilesystemRequestBase from lib.datatransfers.s3.models import S3DataTransferOperation from lib.models.base_model import CamelModel +from firecrest.filesystem.ops.commands.tar_command import TarCommand class PostFileUploadRequest(FilesystemRequestBase): @@ -135,6 +136,10 @@ class CompressRequest(FilesystemRequestBase): default=False, description="If set to `true`, it follows symbolic links and archive the files they point to instead of the links themselves.", ) + compression: Optional[TarCommand.CompressionType] = Field( + default="gzip", + description="Defines the type of compression to be used. By default gzip is used.", + ) model_config = { "json_schema_extra": { "examples": [ @@ -144,6 +149,7 @@ class CompressRequest(FilesystemRequestBase): "match_pattern": "*./[ab].*\\.txt", "dereference": "true", "account": "group", + "compression": "none", } ] } @@ -161,6 +167,10 @@ class ExtractRequest(FilesystemRequestBase): account: Optional[str] = Field( default=None, description="Name of the account in the scheduler" ) + compression: Optional[TarCommand.CompressionType] = Field( + default="gzip", + description="Defines the type of compression to be used. By default gzip is used.", + ) model_config = { "json_schema_extra": { "examples": [ @@ -168,6 +178,7 @@ class ExtractRequest(FilesystemRequestBase): "source_path": "/home/user/dir/file.tar.gz", "target_path": "/home/user/dir", "account": "group", + "compression": "none", } ] } diff --git a/src/firecrest/filesystem/transfer/router.py b/src/firecrest/filesystem/transfer/router.py index f6084d37..34eef04b 100644 --- a/src/firecrest/filesystem/transfer/router.py +++ b/src/firecrest/filesystem/transfer/router.py @@ -30,6 +30,7 @@ SchedulerClientDependency, ServiceAvailabilityDependency, ) +from firecrest.filesystem.ops.commands.tar_command import TarCommand # clients from lib.scheduler_clients.slurm.slurm_rest_client import SlurmRestClient @@ -397,6 +398,21 @@ async def compress( if request.dereference: options += "--dereference" + match request.compression: + case TarCommand.CompressionType.none: + compression_flag = "" + case TarCommand.CompressionType.gzip: + compression_flag = "z" + case TarCommand.CompressionType.bzip2: + compression_flag = "j" + case TarCommand.CompressionType.xz: + compression_flag = "J" + case _: + raise HTTPException( + status_code=status.HTTP_501_NOT_IMPLEMENTED, + detail="The requested compression type is not implemented.", + ) + parameters = { "sbatch_directives": _format_directives( system.datatransfer_jobs_directives, request.account @@ -406,6 +422,7 @@ async def compress( "target_path": request.target_path, "match_pattern": request.match_pattern, "options": options, + "compression_flag": compression_flag, } job_script = _build_script("job_compress.sh", parameters) @@ -459,12 +476,28 @@ async def extract( f"The system {system_name} has no filesystem defined as default_work_dir" ) + match request.compression: + case TarCommand.CompressionType.none: + compression_flag = "" + case TarCommand.CompressionType.gzip: + compression_flag = "z" + case TarCommand.CompressionType.bzip2: + compression_flag = "j" + case TarCommand.CompressionType.xz: + compression_flag = "J" + case _: + raise HTTPException( + status_code=status.HTTP_501_NOT_IMPLEMENTED, + detail="The requested compression type is not implemented.", + ) + parameters = { "sbatch_directives": _format_directives( system.datatransfer_jobs_directives, request.account ), "source_path": request.path, "target_path": request.target_path, + "compression_flag": compression_flag, } job_script = _build_script("job_extract.sh", parameters) diff --git a/src/firecrest/filesystem/transfer/scripts/job_compress.sh b/src/firecrest/filesystem/transfer/scripts/job_compress.sh index e5aaa8e4..7467c2a7 100644 --- a/src/firecrest/filesystem/transfer/scripts/job_compress.sh +++ b/src/firecrest/filesystem/transfer/scripts/job_compress.sh @@ -11,7 +11,7 @@ echo $(date -u) "Compress Files Job (id:${SLURM_JOB_ID:-${PBS_JOBID:-unknown}})" {% if match_pattern %} -status=$(cd {{ source_dir }}; find . -type f -regex '{{ match_pattern }}' -print0 | tar {{ options }} -czvf '{{ target_path }}' --null --files-from - ) +status=$(cd {{ source_dir }}; find . -type f -regex '{{ match_pattern }}' -print0 | tar {{ options }} -c{{compression_flag}}vf '{{ target_path }}' --null --files-from - ) if [[ "$?" == '0' ]] then echo $(date -u) "Files were successfully compressed." @@ -24,7 +24,7 @@ fi {% else %} -status=$(tar {{ options }} -czvf '{{ target_path }}' -C '{{ source_dir }}' '{{ source_file }}') +status=$(tar {{ options }} -c{{compression_flag}}vf '{{ target_path }}' -C '{{ source_dir }}' '{{ source_file }}') if [[ "$?" == '0' ]] then echo $(date -u) "Files were successfully compressed." diff --git a/src/firecrest/filesystem/transfer/scripts/job_extract.sh b/src/firecrest/filesystem/transfer/scripts/job_extract.sh index 2a80ac6d..632e30e8 100644 --- a/src/firecrest/filesystem/transfer/scripts/job_extract.sh +++ b/src/firecrest/filesystem/transfer/scripts/job_extract.sh @@ -8,7 +8,7 @@ echo $(date -u) "Extract Files Job (id:${SLURM_JOB_ID:-${PBS_JOBID:-unknown}})" -status=$(tar -xzf '{{ source_path }}' -C '{{ target_path }}') +status=$(tar -x{{compression_flag}}f '{{ source_path }}' -C '{{ target_path }}') if [[ "$?" == '0' ]] then echo $(date -u) "Files were successfully extracted."