Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Compress and Extract end-points now support multiple compresion types (none, bz2, gzip, and xz).

### Changed

## [2.3.1]
Expand Down
28 changes: 26 additions & 2 deletions src/firecrest/filesystem/ops/commands/tar_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from enum import Enum
import os

from fastapi import HTTPException, status

from firecrest.filesystem.ops.commands.base_command_with_timeout import (
BaseCommandWithTimeout,
)
Expand All @@ -17,12 +19,19 @@ class Operation(str, Enum):
compress = "compress"
extract = "extract"

class CompressionType(str, Enum):
none = "none"
bzip2 = "bzip2"
gzip = "gzip"
xz = "xz"

def __init__(
self,
source_path: str,
target_path: str,
match_pattern: str = None,
dereference: bool = False,
compression: CompressionType = CompressionType.gzip,
operation: Operation = Operation.compress,
) -> None:
super().__init__()
Expand All @@ -32,6 +41,21 @@ def __init__(
self.dereference = dereference
self.operation = operation

match compression:
case TarCommand.CompressionType.none:
self.compression_flag = ""
case TarCommand.CompressionType.gzip:
self.compression_flag = "z"
case TarCommand.CompressionType.bzip2:
self.compression_flag = "j"
case TarCommand.CompressionType.xz:
self.compression_flag = "J"
case _:
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="The requested compression type is not implemented.",
)

def get_command(
self,
) -> str:
Expand All @@ -53,11 +77,11 @@ def get_compress_command(self) -> str:
if self.match_pattern:
return f"{super().get_command()} bash -c \"cd {source_dir}; {super().get_command()} find . -type f -regex '{self.match_pattern}' -print0 | tar {options} -czvf '{self.target_path}' --null --files-from - \""

return f"{super().get_command()} tar {options} -czvf '{self.target_path}' -C '{source_dir}' '{source_file}'"
return f"{super().get_command()} tar {options} -c{self.compression_flag}vf '{self.target_path}' -C '{source_dir}' '{source_file}'"

def get_extract_command(self) -> str:

return f"{super().get_command()} tar -xzf '{self.source_path}' -C '{self.target_path}'"
return f"{super().get_command()} tar -x{self.compression_flag}f '{self.source_path}' -C '{self.target_path}'"

def parse_output(self, stdout: str, stderr: str, exit_status: int):
if exit_status != 0:
Expand Down
64 changes: 37 additions & 27 deletions src/firecrest/filesystem/ops/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from firecrest.filesystem.models import FilesystemRequestBase
from lib.models import CamelModel
from pydantic import Field
from firecrest.filesystem.ops.commands.tar_command import TarCommand


class ContentUnit(str, Enum):
Expand Down Expand Up @@ -100,12 +101,7 @@ class PutFileChmodRequest(FilesystemRequestBase):
mode: str = Field(..., description="Mode in octal permission format")
model_config = {
"json_schema_extra": {
"examples": [
{
"path": "/home/user/dir/file.out",
"mode": "777"
}
]
"examples": [{"path": "/home/user/dir/file.out", "mode": "777"}]
}
}

Expand All @@ -115,15 +111,19 @@ class PutFileChmodResponse(CamelModel):


class PutFileChownRequest(FilesystemRequestBase):
owner: Optional[str] = Field(default="", description="User name of the new user owner of the file")
group: Optional[str] = Field(default="", description="Group name of the new group owner of the file")
owner: Optional[str] = Field(
default="", description="User name of the new user owner of the file"
)
group: Optional[str] = Field(
default="", description="Group name of the new group owner of the file"
)
model_config = {
"json_schema_extra": {
"examples": [
{
"path": "/home/user/dir/file.out",
"owner": "user",
"group": "my-group"
"group": "my-group",
}
]
}
Expand All @@ -135,15 +135,13 @@ class PutFileChownResponse(CamelModel):


class PostMakeDirRequest(FilesystemRequestBase):
parent: Optional[bool] = Field(default=False, description="If set to `true` creates all its parent directories if they do not already exist")
parent: Optional[bool] = Field(
default=False,
description="If set to `true` creates all its parent directories if they do not already exist",
)
model_config = {
"json_schema_extra": {
"examples": [
{
"path": "/home/user/dir/newdir",
"parent": "true"
}
]
"examples": [{"path": "/home/user/dir/newdir", "parent": "true"}]
}
}

Expand All @@ -152,12 +150,7 @@ class PostFileSymlinkRequest(FilesystemRequestBase):
link_path: str = Field(..., description="Path to the new symlink")
model_config = {
"json_schema_extra": {
"examples": [
{
"path": "/home/user/dir",
"link_path": "/home/user/newlink"
}
]
"examples": [{"path": "/home/user/dir", "link_path": "/home/user/newlink"}]
}
}

Expand All @@ -176,30 +169,47 @@ class PostMkdirResponse(CamelModel):

class PostCompressRequest(FilesystemRequestBase):
target_path: str = Field(..., description="Path to the compressed file")
match_pattern: Optional[str] = Field(default=None, description="Regex pattern to filter files to compress")
dereference: Optional[bool] = Field(default=False, description="If set to `true`, it follows symbolic links and archive the files they point to instead of the links themselves.")
match_pattern: Optional[str] = Field(
default=None, description="Regex pattern to filter files to compress"
)
dereference: Optional[bool] = Field(
default=False,
description="If set to `true`, it follows symbolic links and archive the files they point to instead of the links themselves.",
)
compression: Optional[TarCommand.CompressionType] = Field(
default="gzip",
description="Defines the type of compression to be used. By default gzip is used.",
)
model_config = {
"json_schema_extra": {
"examples": [
{
"path": "/home/user/dir",
"target_path": "/home/user/file.tar.gz",
"match_pattern": "*./[ab].*\\.txt",
"dereference": "true"
"dereference": "true",
"compression": "none",
}
]
}
}


class PostExtractRequest(FilesystemRequestBase):
target_path: str = Field(..., description="Path to the directory where to extract the compressed file")
target_path: str = Field(
..., description="Path to the directory where to extract the compressed file"
)
compression: Optional[TarCommand.CompressionType] = Field(
default="gzip",
description="Defines the type of compression to be used. By default gzip is used.",
)
model_config = {
"json_schema_extra": {
"examples": [
{
"path": "/home/user/dir/file.tar.gz",
"target_path": "/home/user/dir"
"target_path": "/home/user/dir",
"compression": "none",
}
]
}
Expand Down
6 changes: 4 additions & 2 deletions src/firecrest/filesystem/ops/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,8 +654,9 @@ async def post_compress(
tar = TarCommand(
request_model.path,
request_model.target_path,
request_model.match_pattern,
request_model.dereference,
match_pattern=request_model.match_pattern,
dereference=request_model.dereference,
compression=request_model.compression,
operation=TarCommand.Operation.compress,
)

Expand Down Expand Up @@ -684,6 +685,7 @@ async def post_extract(
tar = TarCommand(
request_model.path,
request_model.target_path,
compression=request_model.compression,
operation=TarCommand.Operation.extract,
)

Expand Down
11 changes: 11 additions & 0 deletions src/firecrest/filesystem/transfer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from firecrest.filesystem.models import FilesystemRequestBase
from lib.datatransfers.s3.models import S3DataTransferOperation
from lib.models.base_model import CamelModel
from firecrest.filesystem.ops.commands.tar_command import TarCommand


class PostFileUploadRequest(FilesystemRequestBase):
Expand Down Expand Up @@ -135,6 +136,10 @@ class CompressRequest(FilesystemRequestBase):
default=False,
description="If set to `true`, it follows symbolic links and archive the files they point to instead of the links themselves.",
)
compression: Optional[TarCommand.CompressionType] = Field(
default="gzip",
description="Defines the type of compression to be used. By default gzip is used.",
)
model_config = {
"json_schema_extra": {
"examples": [
Expand All @@ -144,6 +149,7 @@ class CompressRequest(FilesystemRequestBase):
"match_pattern": "*./[ab].*\\.txt",
"dereference": "true",
"account": "group",
"compression": "none",
}
]
}
Expand All @@ -161,13 +167,18 @@ class ExtractRequest(FilesystemRequestBase):
account: Optional[str] = Field(
default=None, description="Name of the account in the scheduler"
)
compression: Optional[TarCommand.CompressionType] = Field(
default="gzip",
description="Defines the type of compression to be used. By default gzip is used.",
)
model_config = {
"json_schema_extra": {
"examples": [
{
"source_path": "/home/user/dir/file.tar.gz",
"target_path": "/home/user/dir",
"account": "group",
"compression": "none",
}
]
}
Expand Down
33 changes: 33 additions & 0 deletions src/firecrest/filesystem/transfer/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
SchedulerClientDependency,
ServiceAvailabilityDependency,
)
from firecrest.filesystem.ops.commands.tar_command import TarCommand

# clients
from lib.scheduler_clients.slurm.slurm_rest_client import SlurmRestClient
Expand Down Expand Up @@ -397,6 +398,21 @@ async def compress(
if request.dereference:
options += "--dereference"

match request.compression:
case TarCommand.CompressionType.none:
compression_flag = ""
case TarCommand.CompressionType.gzip:
compression_flag = "z"
case TarCommand.CompressionType.bzip2:
compression_flag = "j"
case TarCommand.CompressionType.xz:
compression_flag = "J"
case _:
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="The requested compression type is not implemented.",
)

parameters = {
"sbatch_directives": _format_directives(
system.datatransfer_jobs_directives, request.account
Expand All @@ -406,6 +422,7 @@ async def compress(
"target_path": request.target_path,
"match_pattern": request.match_pattern,
"options": options,
"compression_flag": compression_flag,
}

job_script = _build_script("job_compress.sh", parameters)
Expand Down Expand Up @@ -459,12 +476,28 @@ async def extract(
f"The system {system_name} has no filesystem defined as default_work_dir"
)

match request.compression:
case TarCommand.CompressionType.none:
compression_flag = ""
case TarCommand.CompressionType.gzip:
compression_flag = "z"
case TarCommand.CompressionType.bzip2:
compression_flag = "j"
case TarCommand.CompressionType.xz:
compression_flag = "J"
case _:
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="The requested compression type is not implemented.",
)

parameters = {
"sbatch_directives": _format_directives(
system.datatransfer_jobs_directives, request.account
),
"source_path": request.path,
"target_path": request.target_path,
"compression_flag": compression_flag,
}

job_script = _build_script("job_extract.sh", parameters)
Expand Down
4 changes: 2 additions & 2 deletions src/firecrest/filesystem/transfer/scripts/job_compress.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ echo $(date -u) "Compress Files Job (id:${SLURM_JOB_ID:-${PBS_JOBID:-unknown}})"

{% if match_pattern %}

status=$(cd {{ source_dir }}; find . -type f -regex '{{ match_pattern }}' -print0 | tar {{ options }} -czvf '{{ target_path }}' --null --files-from - )
status=$(cd {{ source_dir }}; find . -type f -regex '{{ match_pattern }}' -print0 | tar {{ options }} -c{{compression_flag}}vf '{{ target_path }}' --null --files-from - )
if [[ "$?" == '0' ]]
then
echo $(date -u) "Files were successfully compressed."
Expand All @@ -24,7 +24,7 @@ fi

{% else %}

status=$(tar {{ options }} -czvf '{{ target_path }}' -C '{{ source_dir }}' '{{ source_file }}')
status=$(tar {{ options }} -c{{compression_flag}}vf '{{ target_path }}' -C '{{ source_dir }}' '{{ source_file }}')
if [[ "$?" == '0' ]]
then
echo $(date -u) "Files were successfully compressed."
Expand Down
2 changes: 1 addition & 1 deletion src/firecrest/filesystem/transfer/scripts/job_extract.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

echo $(date -u) "Extract Files Job (id:${SLURM_JOB_ID:-${PBS_JOBID:-unknown}})"

status=$(tar -xzf '{{ source_path }}' -C '{{ target_path }}')
status=$(tar -x{{compression_flag}}f '{{ source_path }}' -C '{{ target_path }}')
if [[ "$?" == '0' ]]
then
echo $(date -u) "Files were successfully extracted."
Expand Down