Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,17 @@ The SDK automatically handles all dependency packaging for Data Cloud deployment

**No need to worry about platform compatibility** - the SDK handles this automatically through the Docker-based packaging process.

## files directory

```
.
├── payload
│ ├── config.json
│ ├── entrypoint.py
├── files
│ ├── data.csv
```

## py-files directory

Your Python dependencies can be packaged as .py files, .zip archives (containing multiple .py files or a Python package structure), or .egg files.
Expand All @@ -124,6 +135,7 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing
Your entry point script will define logic using the `Client` object which wraps data access layers.

You should only need the following methods:
* `find_file_path(file_name)` - Returns a file path
* `read_dlo(name)` – Read from a Data Lake Object by name
* `read_dmo(name)` – Read from a Data Model Object by name
* `write_to_dlo(name, spark_dataframe, write_mode)` – Write to a Data Model Object by name with a Spark dataframe
Expand Down Expand Up @@ -197,6 +209,7 @@ Argument:
Options:
- `--config-file TEXT`: Path to configuration file
- `--dependencies TEXT`: Additional dependencies (can be specified multiple times)
- `--profile TEXT`: Credential profile name (default: "default")


#### `datacustomcode zip`
Expand Down
12 changes: 12 additions & 0 deletions src/datacustomcode/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
from pyspark.sql import SparkSession

from datacustomcode.config import SparkConfig, config
from datacustomcode.file.path.default import DefaultFindFilePath
from datacustomcode.io.reader.base import BaseDataCloudReader

if TYPE_CHECKING:
from pathlib import Path

from pyspark.sql import DataFrame as PySparkDataFrame

from datacustomcode.io.reader.base import BaseDataCloudReader
Expand Down Expand Up @@ -100,18 +103,21 @@ class Client:
writing, we print to the console instead of writing to Data Cloud.

Args:
finder: Find a file path
reader: A custom reader to use for reading Data Cloud objects.
writer: A custom writer to use for writing Data Cloud objects.

Example:
>>> client = Client()
>>> file_path = client.find_file_path("data.csv")
>>> dlo = client.read_dlo("my_dlo")
>>> client.write_to_dmo("my_dmo", dlo)
"""

_instance: ClassVar[Optional[Client]] = None
_reader: BaseDataCloudReader
_writer: BaseDataCloudWriter
_file: DefaultFindFilePath
_data_layer_history: dict[DataCloudObjectType, set[str]]

def __new__(
Expand Down Expand Up @@ -154,6 +160,7 @@ def __new__(
writer_init = writer
cls._instance._reader = reader_init
cls._instance._writer = writer_init
cls._instance._file = DefaultFindFilePath()
cls._instance._data_layer_history = {
DataCloudObjectType.DLO: set(),
DataCloudObjectType.DMO: set(),
Expand Down Expand Up @@ -212,6 +219,11 @@ def write_to_dmo(
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO)
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs)

def find_file_path(self, file_name: str) -> Path:
"""Return a file path"""

return self._file.find_file_path(file_name)

def _validate_data_layer_history_does_not_contain(
self, data_cloud_object_type: DataCloudObjectType
) -> None:
Expand Down
14 changes: 14 additions & 0 deletions src/datacustomcode/file/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
19 changes: 19 additions & 0 deletions src/datacustomcode/file/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations


class BaseDataAccessLayer:
"""Base class for data access layer implementations."""
14 changes: 14 additions & 0 deletions src/datacustomcode/file/path/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
163 changes: 163 additions & 0 deletions src/datacustomcode/file/path/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import os
from pathlib import Path
from typing import Optional

from datacustomcode.file.base import BaseDataAccessLayer


class FileReaderError(Exception):
"""Base exception for file reader operations."""


class FileNotFoundError(FileReaderError):
"""Raised when a file cannot be found."""


class DefaultFindFilePath(BaseDataAccessLayer):
"""Base class for finding file path

This class provides a framework for finding files from various locations
with configurable search strategies and error handling.
"""

# Default configuration values
DEFAULT_CODE_PACKAGE = "payload"
DEFAULT_FILE_FOLDER = "files"
DEFAULT_CONFIG_FILE = "config.json"

def __init__(
self,
code_package: Optional[str] = None,
file_folder: Optional[str] = None,
config_file: Optional[str] = None,
):
"""Initialize the file reader with configuration.

Args:
code_package: The default code package directory to search
file_folder: The folder containing files relative to the code package
config_file: The configuration file to use for path resolution
"""
self.code_package = code_package or self.DEFAULT_CODE_PACKAGE
self.file_folder = file_folder or self.DEFAULT_FILE_FOLDER
self.config_file = config_file or self.DEFAULT_CONFIG_FILE

def find_file_path(self, file_name: str) -> Path:
"""Find a file path.

Args:
file_name: The name of the file to open

Returns:
A file path

Raises:
FileNotFoundError: If the file cannot be found
"""
if not file_name:
raise ValueError("file_name cannot be empty")

file_path = self._resolve_file_path(file_name)

if not file_path.exists():
raise FileNotFoundError(
f"File '{file_name}' not found in any search location"
)

return file_path

def _resolve_file_path(self, file_name: str) -> Path:
"""Resolve the full path to a file.

Args:
file_name: The name of the file to resolve

Returns:
The full path to the file
"""
# First try the default code package location
if self._code_package_exists():
file_path = self._get_code_package_file_path(file_name)
if file_path.exists():
return file_path

# Fall back to config.json-based location
config_path = self._find_config_file()
if config_path:
file_path = self._get_config_based_file_path(file_name, config_path)
if file_path.exists():
return file_path

# Return the file name as a Path if not found in any location
return Path(file_name)

def _code_package_exists(self) -> bool:
"""Check if the default code package directory exists.

Returns:
True if the code package directory exists
"""
return os.path.exists(self.code_package)

def _get_code_package_file_path(self, file_name: str) -> Path:
"""Get the file path relative to the code package.

Args:
file_name: The name of the file

Returns:
The full path to the file
"""
relative_path = f"{self.code_package}/{self.file_folder}/{file_name}"
return Path(relative_path)

def _find_config_file(self) -> Optional[Path]:
"""Find the configuration file in the current directory tree.

Returns:
The path to the config file, or None if not found
"""
return self._find_file_in_tree(self.config_file, Path.cwd())

def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path:
"""Get the file path relative to the config file location.

Args:
file_name: The name of the file
config_path: The path to the config file

Returns:
The full path to the file
"""
relative_path = f"{self.file_folder}/{file_name}"
return Path(relative_path)

def _find_file_in_tree(self, filename: str, search_path: Path) -> Optional[Path]:
"""Find a file within a directory tree.

Args:
filename: The name of the file to find
search_path: The root directory to search from

Returns:
The full path to the file, or None if not found
"""
for file_path in search_path.rglob(filename):
return file_path
return None
14 changes: 14 additions & 0 deletions tests/file/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 changes: 14 additions & 0 deletions tests/file/path/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright (c) 2025, Salesforce, Inc.
# SPDX-License-Identifier: Apache-2
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading