Skip to content

Commit 77e417d

Browse files
authored
Merge pull request #45 from forcedotcom/find_file_path
Find file path
2 parents 7451fba + c023866 commit 77e417d

File tree

10 files changed

+826
-0
lines changed

10 files changed

+826
-0
lines changed

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,17 @@ The SDK automatically handles all dependency packaging for Data Cloud deployment
104104

105105
**No need to worry about platform compatibility** - the SDK handles this automatically through the Docker-based packaging process.
106106

107+
## files directory
108+
109+
```
110+
.
111+
├── payload
112+
│ ├── config.json
113+
│ ├── entrypoint.py
114+
├── files
115+
│ ├── data.csv
116+
```
117+
107118
## py-files directory
108119

109120
Your Python dependencies can be packaged as .py files, .zip archives (containing multiple .py files or a Python package structure), or .egg files.
@@ -124,6 +135,7 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing
124135
Your entry point script will define logic using the `Client` object which wraps data access layers.
125136

126137
You should only need the following methods:
138+
* `find_file_path(file_name)` - Returns a file path
127139
* `read_dlo(name)` – Read from a Data Lake Object by name
128140
* `read_dmo(name)` – Read from a Data Model Object by name
129141
* `write_to_dlo(name, spark_dataframe, write_mode)` – Write to a Data Model Object by name with a Spark dataframe
@@ -197,6 +209,7 @@ Argument:
197209
Options:
198210
- `--config-file TEXT`: Path to configuration file
199211
- `--dependencies TEXT`: Additional dependencies (can be specified multiple times)
212+
- `--profile TEXT`: Credential profile name (default: "default")
200213

201214

202215
#### `datacustomcode zip`

src/datacustomcode/client.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,12 @@
2424
from pyspark.sql import SparkSession
2525

2626
from datacustomcode.config import SparkConfig, config
27+
from datacustomcode.file.path.default import DefaultFindFilePath
2728
from datacustomcode.io.reader.base import BaseDataCloudReader
2829

2930
if TYPE_CHECKING:
31+
from pathlib import Path
32+
3033
from pyspark.sql import DataFrame as PySparkDataFrame
3134

3235
from datacustomcode.io.reader.base import BaseDataCloudReader
@@ -100,18 +103,21 @@ class Client:
100103
writing, we print to the console instead of writing to Data Cloud.
101104
102105
Args:
106+
finder: Find a file path
103107
reader: A custom reader to use for reading Data Cloud objects.
104108
writer: A custom writer to use for writing Data Cloud objects.
105109
106110
Example:
107111
>>> client = Client()
112+
>>> file_path = client.find_file_path("data.csv")
108113
>>> dlo = client.read_dlo("my_dlo")
109114
>>> client.write_to_dmo("my_dmo", dlo)
110115
"""
111116

112117
_instance: ClassVar[Optional[Client]] = None
113118
_reader: BaseDataCloudReader
114119
_writer: BaseDataCloudWriter
120+
_file: DefaultFindFilePath
115121
_data_layer_history: dict[DataCloudObjectType, set[str]]
116122

117123
def __new__(
@@ -154,6 +160,7 @@ def __new__(
154160
writer_init = writer
155161
cls._instance._reader = reader_init
156162
cls._instance._writer = writer_init
163+
cls._instance._file = DefaultFindFilePath()
157164
cls._instance._data_layer_history = {
158165
DataCloudObjectType.DLO: set(),
159166
DataCloudObjectType.DMO: set(),
@@ -212,6 +219,11 @@ def write_to_dmo(
212219
self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO)
213220
return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs)
214221

222+
def find_file_path(self, file_name: str) -> Path:
223+
"""Return a file path"""
224+
225+
return self._file.find_file_path(file_name)
226+
215227
def _validate_data_layer_history_does_not_contain(
216228
self, data_cloud_object_type: DataCloudObjectType
217229
) -> None:
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.

src/datacustomcode/file/base.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
from __future__ import annotations
16+
17+
18+
class BaseDataAccessLayer:
19+
"""Base class for data access layer implementations."""
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
from __future__ import annotations
16+
17+
import os
18+
from pathlib import Path
19+
from typing import Optional
20+
21+
from datacustomcode.file.base import BaseDataAccessLayer
22+
23+
24+
class FileReaderError(Exception):
25+
"""Base exception for file reader operations."""
26+
27+
28+
class FileNotFoundError(FileReaderError):
29+
"""Raised when a file cannot be found."""
30+
31+
32+
class DefaultFindFilePath(BaseDataAccessLayer):
33+
"""Base class for finding file path
34+
35+
This class provides a framework for finding files from various locations
36+
with configurable search strategies and error handling.
37+
"""
38+
39+
# Default configuration values
40+
DEFAULT_CODE_PACKAGE = "payload"
41+
DEFAULT_FILE_FOLDER = "files"
42+
DEFAULT_CONFIG_FILE = "config.json"
43+
44+
def __init__(
45+
self,
46+
code_package: Optional[str] = None,
47+
file_folder: Optional[str] = None,
48+
config_file: Optional[str] = None,
49+
):
50+
"""Initialize the file reader with configuration.
51+
52+
Args:
53+
code_package: The default code package directory to search
54+
file_folder: The folder containing files relative to the code package
55+
config_file: The configuration file to use for path resolution
56+
"""
57+
self.code_package = code_package or self.DEFAULT_CODE_PACKAGE
58+
self.file_folder = file_folder or self.DEFAULT_FILE_FOLDER
59+
self.config_file = config_file or self.DEFAULT_CONFIG_FILE
60+
61+
def find_file_path(self, file_name: str) -> Path:
62+
"""Find a file path.
63+
64+
Args:
65+
file_name: The name of the file to open
66+
67+
Returns:
68+
A file path
69+
70+
Raises:
71+
FileNotFoundError: If the file cannot be found
72+
"""
73+
if not file_name:
74+
raise ValueError("file_name cannot be empty")
75+
76+
file_path = self._resolve_file_path(file_name)
77+
78+
if not file_path.exists():
79+
raise FileNotFoundError(
80+
f"File '{file_name}' not found in any search location"
81+
)
82+
83+
return file_path
84+
85+
def _resolve_file_path(self, file_name: str) -> Path:
86+
"""Resolve the full path to a file.
87+
88+
Args:
89+
file_name: The name of the file to resolve
90+
91+
Returns:
92+
The full path to the file
93+
"""
94+
# First try the default code package location
95+
if self._code_package_exists():
96+
file_path = self._get_code_package_file_path(file_name)
97+
if file_path.exists():
98+
return file_path
99+
100+
# Fall back to config.json-based location
101+
config_path = self._find_config_file()
102+
if config_path:
103+
file_path = self._get_config_based_file_path(file_name, config_path)
104+
if file_path.exists():
105+
return file_path
106+
107+
# Return the file name as a Path if not found in any location
108+
return Path(file_name)
109+
110+
def _code_package_exists(self) -> bool:
111+
"""Check if the default code package directory exists.
112+
113+
Returns:
114+
True if the code package directory exists
115+
"""
116+
return os.path.exists(self.code_package)
117+
118+
def _get_code_package_file_path(self, file_name: str) -> Path:
119+
"""Get the file path relative to the code package.
120+
121+
Args:
122+
file_name: The name of the file
123+
124+
Returns:
125+
The full path to the file
126+
"""
127+
relative_path = f"{self.code_package}/{self.file_folder}/{file_name}"
128+
return Path(relative_path)
129+
130+
def _find_config_file(self) -> Optional[Path]:
131+
"""Find the configuration file in the current directory tree.
132+
133+
Returns:
134+
The path to the config file, or None if not found
135+
"""
136+
return self._find_file_in_tree(self.config_file, Path.cwd())
137+
138+
def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path:
139+
"""Get the file path relative to the config file location.
140+
141+
Args:
142+
file_name: The name of the file
143+
config_path: The path to the config file
144+
145+
Returns:
146+
The full path to the file
147+
"""
148+
relative_path = f"{self.file_folder}/{file_name}"
149+
return Path(relative_path)
150+
151+
def _find_file_in_tree(self, filename: str, search_path: Path) -> Optional[Path]:
152+
"""Find a file within a directory tree.
153+
154+
Args:
155+
filename: The name of the file to find
156+
search_path: The root directory to search from
157+
158+
Returns:
159+
The full path to the file, or None if not found
160+
"""
161+
for file_path in search_path.rglob(filename):
162+
return file_path
163+
return None

tests/file/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.

tests/file/path/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (c) 2025, Salesforce, Inc.
2+
# SPDX-License-Identifier: Apache-2
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.

0 commit comments

Comments
 (0)