From f1f9e2a77d8fb3242acf9645968163dc1f1ea0be Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Fri, 22 Aug 2025 15:43:05 -0700 Subject: [PATCH 1/6] added new SDK api --- src/datacustomcode/client.py | 13 +++++- src/datacustomcode/file/__init__.py | 0 src/datacustomcode/file/base.py | 21 +++++++++ src/datacustomcode/file/reader/__init__.py | 0 src/datacustomcode/file/reader/base.py | 51 ++++++++++++++++++++++ 5 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 src/datacustomcode/file/__init__.py create mode 100644 src/datacustomcode/file/base.py create mode 100644 src/datacustomcode/file/reader/__init__.py create mode 100644 src/datacustomcode/file/reader/base.py diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index 7a1f9a4..076ef9e 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -14,17 +14,18 @@ # limitations under the License. from __future__ import annotations +import io + from enum import Enum from typing import ( TYPE_CHECKING, ClassVar, Optional, ) - from pyspark.sql import SparkSession - from datacustomcode.config import SparkConfig, config from datacustomcode.io.reader.base import BaseDataCloudReader +from datacustomcode.file.reader.base import BaseFileReader if TYPE_CHECKING: from pyspark.sql import DataFrame as PySparkDataFrame @@ -112,6 +113,7 @@ class Client: _instance: ClassVar[Optional[Client]] = None _reader: BaseDataCloudReader _writer: BaseDataCloudWriter + _file: BaseFileReader _data_layer_history: dict[DataCloudObjectType, set[str]] def __new__( @@ -154,6 +156,7 @@ def __new__( writer_init = writer cls._instance._reader = reader_init cls._instance._writer = writer_init + cls._instance._file = BaseFileReader() cls._instance._data_layer_history = { DataCloudObjectType.DLO: set(), DataCloudObjectType.DMO: set(), @@ -212,6 +215,12 @@ def write_to_dmo( self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO) return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) + def file_open(self, file_name: str) -> io.TextIOWrapper: + """Read a file from the local file system. + """ + + return self._file.file_open(file_name) + def _validate_data_layer_history_does_not_contain( self, data_cloud_object_type: DataCloudObjectType ) -> None: diff --git a/src/datacustomcode/file/__init__.py b/src/datacustomcode/file/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/datacustomcode/file/base.py b/src/datacustomcode/file/base.py new file mode 100644 index 0000000..d00cdf0 --- /dev/null +++ b/src/datacustomcode/file/base.py @@ -0,0 +1,21 @@ +# Copyright (c) 2025, Salesforce, Inc. +# SPDX-License-Identifier: Apache-2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from abc import ABC + + +class BaseDataAccessLayer(ABC): + pass \ No newline at end of file diff --git a/src/datacustomcode/file/reader/__init__.py b/src/datacustomcode/file/reader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/datacustomcode/file/reader/base.py b/src/datacustomcode/file/reader/base.py new file mode 100644 index 0000000..30527c3 --- /dev/null +++ b/src/datacustomcode/file/reader/base.py @@ -0,0 +1,51 @@ +# Copyright (c) 2025, Salesforce, Inc. +# SPDX-License-Identifier: Apache-2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import io +import os + +from abc import abstractmethod +from pathlib import Path +from datacustomcode.file.base import BaseDataAccessLayer + +class BaseFileReader(BaseDataAccessLayer): + def file_open(self, file_name: str) -> io.TextIOWrapper: + default_code_pacakge = 'payload' + file_folder = 'files' # hardcoded folder name + + file_path = None + if os.path.exists(default_code_pacakge): + relative = f"{default_code_pacakge}/{file_folder}/{file_name}" + file_path = Path.cwd().joinpath(relative) + else: + # find config.json, files folder is right next to the config.json + config_abs_path = Path(self.find_file_pathlib("config.json", Path.cwd())) + relative = f"{file_folder}/{file_name}" + file_path = config_abs_path.parent.joinpath(relative) + + file_handle = open(file_path, 'r') + #print(f"chuy file type: {type(foo), file_path}") + return file_handle + + def find_file_pathlib(self, filename, search_path): + """ + Finds a file within a directory and its subdirectories using pathlib. + Returns the full path of the first match found, or None if not found. + """ + path_obj = Path(search_path) + for file_path in path_obj.rglob(filename): + return str(file_path) # Convert Path object to string + return None \ No newline at end of file From 72a9debdf51cfd287690ea68ab1f225eda10f7de Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Fri, 22 Aug 2025 15:58:23 -0700 Subject: [PATCH 2/6] refactoring Updating README passing all the tests renamed to read_file() return base path when all else fails add few more tests --- README.md | 1 + docs/file_reader_refactoring.md | 179 ++++++++++ src/datacustomcode/client.py | 19 +- src/datacustomcode/file/__init__.py | 14 + src/datacustomcode/file/base.py | 6 +- src/datacustomcode/file/reader/__init__.py | 14 + src/datacustomcode/file/reader/base.py | 51 --- src/datacustomcode/file/reader/default.py | 203 ++++++++++++ tests/test_file_reader.py | 363 +++++++++++++++++++++ 9 files changed, 786 insertions(+), 64 deletions(-) create mode 100644 docs/file_reader_refactoring.md delete mode 100644 src/datacustomcode/file/reader/base.py create mode 100644 src/datacustomcode/file/reader/default.py create mode 100644 tests/test_file_reader.py diff --git a/README.md b/README.md index e843e9b..900ba5e 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,7 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing Your entry point script will define logic using the `Client` object which wraps data access layers. You should only need the following methods: +* `read_file(file_name)` - Returns a file handle of the provided file_name * `read_dlo(name)` – Read from a Data Lake Object by name * `read_dmo(name)` – Read from a Data Model Object by name * `write_to_dlo(name, spark_dataframe, write_mode)` – Write to a Data Model Object by name with a Spark dataframe diff --git a/docs/file_reader_refactoring.md b/docs/file_reader_refactoring.md new file mode 100644 index 0000000..a1d88c5 --- /dev/null +++ b/docs/file_reader_refactoring.md @@ -0,0 +1,179 @@ +# DefaultFileReader Class Refactoring + +## Overview + +The `DefaultFileReader` class has been refactored to improve testability, readability, and maintainability. This document outlines the changes made and how to use the new implementation. + +## Key Improvements + +### 1. **Separation of Concerns** +- **File path resolution** is now handled by dedicated methods +- **File opening** is separated from path resolution +- **Configuration management** is centralized and configurable + +### 2. **Enhanced Testability** +- **Dependency injection** through constructor parameters +- **Mockable methods** for unit testing +- **Clear interfaces** between different responsibilities +- **Comprehensive test coverage** with isolated test cases + +### 3. **Better Error Handling** +- **Custom exception hierarchy** for different error types +- **Descriptive error messages** with context +- **Proper exception chaining** for debugging + +### 4. **Improved Configuration** +- **Configurable defaults** that can be overridden +- **Environment-specific settings** support +- **Clear configuration contract** + +### 5. **Enhanced Readability** +- **Comprehensive docstrings** for all methods +- **Clear method names** that describe their purpose +- **Logical method organization** from public to private +- **Type hints** throughout the codebase + +## Class Structure + +### DefaultFileReader +The main class that provides the file reading framework: + +```python +class DefaultFileReader(BaseDataAccessLayer): + # Configuration constants + DEFAULT_CODE_PACKAGE = 'payload' + DEFAULT_FILE_FOLDER = 'files' + DEFAULT_CONFIG_FILE = 'config.json' + + def __init__(self, code_package=None, file_folder=None, config_file=None): + # Initialize with custom or default configuration + + def file_open(self, file_name: str) -> io.TextIOWrapper: + # Main public method for opening files + + def get_search_locations(self) -> list[Path]: + # Get all possible search locations +``` + +## Exception Hierarchy + +```python +FileReaderError (base) +├── FileNotFoundError (file not found in any location) +└── FileAccessError (permission, I/O errors, etc.) +``` + +## Usage Examples + +### Basic Usage +```python +from datacustomcode.file.reader.default import DefaultFileReader + +# Use default configuration +reader = DefaultFileReader() +with reader.file_open('data.csv') as f: + content = f.read() +``` + +### Custom Configuration +```python +from datacustomcode.file.reader.default import DefaultFileReader + +# Custom configuration +reader = DefaultFileReader( + code_package='my_package', + file_folder='data', + config_file='settings.json' +) +``` + +### Error Handling +```python +try: + with reader.file_open('data.csv') as f: + content = f.read() +except FileNotFoundError as e: + print(f"File not found: {e}") +except FileAccessError as e: + print(f"Access error: {e}") +``` + +## File Resolution Strategy + +The file reader uses a two-tier search strategy: + +1. **Primary Location**: `{code_package}/{file_folder}/{filename}` +2. **Fallback Location**: `{config_file_parent}/{file_folder}/{filename}` + +This allows for flexible deployment scenarios where files might be in different locations depending on the environment. + +## Testing + +### Unit Tests +The refactored class includes comprehensive unit tests covering: +- Configuration initialization +- File path resolution +- Error handling scenarios +- File opening operations +- Search location determination + +### Mocking +The class is designed for easy mocking in tests: +```python +from unittest.mock import patch + +with patch('DefaultFileReader._resolve_file_path') as mock_resolve: + mock_resolve.return_value = Path('/test/file.txt') + # Test file opening logic +``` + +### Integration Tests +Integration tests verify the complete file resolution and opening flow using temporary directories and real file operations. + +## Migration Guide + +### From Old Implementation +The old implementation had these issues: +- Hardcoded configuration values +- Mixed responsibilities in single methods +- Limited error handling +- Difficult to test + +### To New Implementation +1. **Update imports**: Use `DefaultFileReader` from `datacustomcode.file.reader.default` +2. **Error handling**: Catch specific exceptions instead of generic ones +3. **Configuration**: Use constructor parameters for custom settings +4. **Testing**: Leverage the new mockable methods + +## Benefits + +### For Developers +- **Easier debugging** with clear error messages +- **Better IDE support** with type hints and docstrings +- **Simplified testing** with dependency injection +- **Clearer code structure** with separated responsibilities + +### For Maintainers +- **Easier to extend** with new file resolution strategies +- **Better error tracking** with custom exception types +- **Improved test coverage** with isolated test cases +- **Clearer documentation** with comprehensive docstrings + +### For Users +- **More reliable** with proper error handling +- **More flexible** with configurable behavior +- **Better debugging** with descriptive error messages +- **Consistent interface** across different implementations + +## Future Enhancements + +The refactored structure makes it easy to add: +- **Additional file resolution strategies** (URLs, cloud storage, etc.) +- **File format detection** and automatic handling +- **Caching mechanisms** for frequently accessed files +- **Async file operations** for better performance +- **File validation** and integrity checking + +## Conclusion + +The refactored `DefaultFileReader` class provides a solid foundation for file reading operations while maintaining backward compatibility. The improvements in testability, readability, and maintainability make it easier to develop, test, and maintain file reading functionality in the Data Cloud Custom Code SDK. diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index 076ef9e..58ec656 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -14,20 +14,22 @@ # limitations under the License. from __future__ import annotations -import io - from enum import Enum from typing import ( TYPE_CHECKING, ClassVar, Optional, ) + from pyspark.sql import SparkSession + from datacustomcode.config import SparkConfig, config +from datacustomcode.file.reader.default import DefaultFileReader from datacustomcode.io.reader.base import BaseDataCloudReader -from datacustomcode.file.reader.base import BaseFileReader if TYPE_CHECKING: + import io + from pyspark.sql import DataFrame as PySparkDataFrame from datacustomcode.io.reader.base import BaseDataCloudReader @@ -113,7 +115,7 @@ class Client: _instance: ClassVar[Optional[Client]] = None _reader: BaseDataCloudReader _writer: BaseDataCloudWriter - _file: BaseFileReader + _file: DefaultFileReader _data_layer_history: dict[DataCloudObjectType, set[str]] def __new__( @@ -156,7 +158,7 @@ def __new__( writer_init = writer cls._instance._reader = reader_init cls._instance._writer = writer_init - cls._instance._file = BaseFileReader() + cls._instance._file = DefaultFileReader() cls._instance._data_layer_history = { DataCloudObjectType.DLO: set(), DataCloudObjectType.DMO: set(), @@ -215,11 +217,10 @@ def write_to_dmo( self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO) return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) - def file_open(self, file_name: str) -> io.TextIOWrapper: - """Read a file from the local file system. - """ + def read_file(self, file_name: str) -> io.TextIOWrapper: + """Read a file from the local file system.""" - return self._file.file_open(file_name) + return self._file.read_file(file_name) def _validate_data_layer_history_does_not_contain( self, data_cloud_object_type: DataCloudObjectType diff --git a/src/datacustomcode/file/__init__.py b/src/datacustomcode/file/__init__.py index e69de29..93988ff 100644 --- a/src/datacustomcode/file/__init__.py +++ b/src/datacustomcode/file/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, Salesforce, Inc. +# SPDX-License-Identifier: Apache-2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/datacustomcode/file/base.py b/src/datacustomcode/file/base.py index d00cdf0..fdd8320 100644 --- a/src/datacustomcode/file/base.py +++ b/src/datacustomcode/file/base.py @@ -14,8 +14,6 @@ # limitations under the License. from __future__ import annotations -from abc import ABC - -class BaseDataAccessLayer(ABC): - pass \ No newline at end of file +class BaseDataAccessLayer: + """Base class for data access layer implementations.""" diff --git a/src/datacustomcode/file/reader/__init__.py b/src/datacustomcode/file/reader/__init__.py index e69de29..93988ff 100644 --- a/src/datacustomcode/file/reader/__init__.py +++ b/src/datacustomcode/file/reader/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2025, Salesforce, Inc. +# SPDX-License-Identifier: Apache-2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/src/datacustomcode/file/reader/base.py b/src/datacustomcode/file/reader/base.py deleted file mode 100644 index 30527c3..0000000 --- a/src/datacustomcode/file/reader/base.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (c) 2025, Salesforce, Inc. -# SPDX-License-Identifier: Apache-2 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations - -import io -import os - -from abc import abstractmethod -from pathlib import Path -from datacustomcode.file.base import BaseDataAccessLayer - -class BaseFileReader(BaseDataAccessLayer): - def file_open(self, file_name: str) -> io.TextIOWrapper: - default_code_pacakge = 'payload' - file_folder = 'files' # hardcoded folder name - - file_path = None - if os.path.exists(default_code_pacakge): - relative = f"{default_code_pacakge}/{file_folder}/{file_name}" - file_path = Path.cwd().joinpath(relative) - else: - # find config.json, files folder is right next to the config.json - config_abs_path = Path(self.find_file_pathlib("config.json", Path.cwd())) - relative = f"{file_folder}/{file_name}" - file_path = config_abs_path.parent.joinpath(relative) - - file_handle = open(file_path, 'r') - #print(f"chuy file type: {type(foo), file_path}") - return file_handle - - def find_file_pathlib(self, filename, search_path): - """ - Finds a file within a directory and its subdirectories using pathlib. - Returns the full path of the first match found, or None if not found. - """ - path_obj = Path(search_path) - for file_path in path_obj.rglob(filename): - return str(file_path) # Convert Path object to string - return None \ No newline at end of file diff --git a/src/datacustomcode/file/reader/default.py b/src/datacustomcode/file/reader/default.py new file mode 100644 index 0000000..22f6b6f --- /dev/null +++ b/src/datacustomcode/file/reader/default.py @@ -0,0 +1,203 @@ +# Copyright (c) 2025, Salesforce, Inc. +# SPDX-License-Identifier: Apache-2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import os +from pathlib import Path +from typing import TYPE_CHECKING, Optional + +from datacustomcode.file.base import BaseDataAccessLayer + +if TYPE_CHECKING: + import io + + +class FileReaderError(Exception): + """Base exception for file reader operations.""" + + +class FileNotFoundError(FileReaderError): + """Raised when a file cannot be found.""" + + +class FileAccessError(FileReaderError): + """Raised when there's an error accessing a file.""" + + +class DefaultFileReader(BaseDataAccessLayer): + """Base class for file reading operations. + + This class provides a framework for reading files from various locations + with configurable search strategies and error handling. + """ + + # Default configuration values + DEFAULT_CODE_PACKAGE = "payload" + DEFAULT_FILE_FOLDER = "files" + DEFAULT_CONFIG_FILE = "config.json" + + def __init__( + self, + code_package: Optional[str] = None, + file_folder: Optional[str] = None, + config_file: Optional[str] = None, + ): + """Initialize the file reader with configuration. + + Args: + code_package: The default code package directory to search + file_folder: The folder containing files relative to the code package + config_file: The configuration file to use for path resolution + """ + self.code_package = code_package or self.DEFAULT_CODE_PACKAGE + self.file_folder = file_folder or self.DEFAULT_FILE_FOLDER + self.config_file = config_file or self.DEFAULT_CONFIG_FILE + + def read_file(self, file_name: str) -> "io.TextIOWrapper": + """Open a file for reading. + + Args: + file_name: The name of the file to open + + Returns: + A file handle for reading + + Raises: + FileNotFoundError: If the file cannot be found + FileAccessError: If there's an error opening the file + """ + if not file_name: + raise ValueError("file_name cannot be empty") + + file_path = self._resolve_file_path(file_name) + + if not file_path: + raise FileNotFoundError( + f"File '{file_name}' not found in any search location" + ) + + try: + return self._open_file(file_path) + except (OSError, IOError) as e: + raise FileAccessError(f"Error opening file '{file_path}': {e}") from e + + def _resolve_file_path(self, file_name: str) -> Optional[Path]: + """Resolve the full path to a file. + + Args: + file_name: The name of the file to resolve + + Returns: + The full path to the file, or None if not found + """ + # First try the default code package location + if self._code_package_exists(): + file_path = self._get_code_package_file_path(file_name) + if file_path.exists(): + return file_path + + # Fall back to config.json-based location + config_path = self._find_config_file() + if config_path: + file_path = self._get_config_based_file_path(file_name, config_path) + if file_path.exists(): + return file_path + + return Path(file_name) + + def _code_package_exists(self) -> bool: + """Check if the default code package directory exists. + + Returns: + True if the code package directory exists + """ + return os.path.exists(self.code_package) + + def _get_code_package_file_path(self, file_name: str) -> Path: + """Get the file path relative to the code package. + + Args: + file_name: The name of the file + + Returns: + The full path to the file + """ + relative_path = f"{self.code_package}/{self.file_folder}/{file_name}" + return Path.cwd().joinpath(relative_path) + + def _find_config_file(self) -> Optional[Path]: + """Find the configuration file in the current directory tree. + + Returns: + The path to the config file, or None if not found + """ + return self._find_file_in_tree(self.config_file, Path.cwd()) + + def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path: + """Get the file path relative to the config file location. + + Args: + file_name: The name of the file + config_path: The path to the config file + + Returns: + The full path to the file + """ + relative_path = f"{self.file_folder}/{file_name}" + return config_path.parent.joinpath(relative_path) + + def _find_file_in_tree(self, filename: str, search_path: Path) -> Optional[Path]: + """Find a file within a directory tree. + + Args: + filename: The name of the file to find + search_path: The root directory to search from + + Returns: + The full path to the file, or None if not found + """ + for file_path in search_path.rglob(filename): + return file_path + return None + + def _open_file(self, file_path: Path) -> "io.TextIOWrapper": + """Open a file at the given path. + + Args: + file_path: The path to the file + + Returns: + A file handle for reading + """ + return open(file_path, "r", encoding="utf-8") + + def get_search_locations(self) -> list[Path]: + """Get all possible search locations for files. + + Returns: + A list of paths where files might be found + """ + locations = [] + + # Add code package location + if self._code_package_exists(): + locations.append(Path.cwd().joinpath(self.code_package, self.file_folder)) + + # Add config-based location + config_path = self._find_config_file() + if config_path: + locations.append(config_path.parent.joinpath(self.file_folder)) + + return locations diff --git a/tests/test_file_reader.py b/tests/test_file_reader.py new file mode 100644 index 0000000..184f7bb --- /dev/null +++ b/tests/test_file_reader.py @@ -0,0 +1,363 @@ +# Copyright (c) 2025, Salesforce, Inc. +# SPDX-License-Identifier: Apache-2 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +import tempfile +from unittest.mock import Mock, patch + +import pytest + +from datacustomcode.file.reader.default import ( + DefaultFileReader, + FileAccessError, + FileNotFoundError, + FileReaderError, +) + + +class TestDefaultFileReader: + """Test cases for the DefaultFileReader class.""" + + def test_init_with_defaults(self): + """Test initialization with default values.""" + reader = DefaultFileReader() + assert reader.code_package == "payload" + assert reader.file_folder == "files" + assert reader.config_file == "config.json" + + def test_init_with_custom_values(self): + """Test initialization with custom values.""" + reader = DefaultFileReader( + code_package="custom_package", + file_folder="custom_files", + config_file="custom_config.json", + ) + assert reader.code_package == "custom_package" + assert reader.file_folder == "custom_files" + assert reader.config_file == "custom_config.json" + + def test_file_open_with_empty_filename(self): + """Test that read_file raises ValueError for empty filename.""" + reader = DefaultFileReader() + with pytest.raises(ValueError, match="file_name cannot be empty"): + reader.read_file("") + + def test_file_open_with_none_filename(self): + """Test that read_file raises ValueError for None filename.""" + reader = DefaultFileReader() + with pytest.raises(ValueError, match="file_name cannot be empty"): + reader.read_file(None) + + @patch("datacustomcode.file.reader.default.DefaultFileReader._resolve_file_path") + @patch("datacustomcode.file.reader.default.DefaultFileReader._open_file") + def test_file_open_success(self, mock_open_file, mock_resolve_path): + """Test successful file opening.""" + mock_path = Path("/test/path/file.txt") + mock_file_handle = Mock() + + mock_resolve_path.return_value = mock_path + mock_open_file.return_value = mock_file_handle + + reader = DefaultFileReader() + result = reader.read_file("test.txt") + + assert result == mock_file_handle + mock_resolve_path.assert_called_once_with("test.txt") + mock_open_file.assert_called_once_with(mock_path) + + @patch("datacustomcode.file.reader.default.DefaultFileReader._resolve_file_path") + def test_file_open_file_not_found(self, mock_resolve_path): + """Test read_file when file is not found.""" + mock_resolve_path.return_value = None + + reader = DefaultFileReader() + with pytest.raises(FileNotFoundError, match="File 'test.txt' not found"): + reader.read_file("test.txt") + + @patch("datacustomcode.file.reader.default.DefaultFileReader._resolve_file_path") + @patch("datacustomcode.file.reader.default.DefaultFileReader._open_file") + def test_file_open_access_error(self, mock_open_file, mock_resolve_path): + """Test read_file when there's an access error.""" + mock_path = Path("/test/path/file.txt") + mock_resolve_path.return_value = mock_path + mock_open_file.side_effect = PermissionError("Permission denied") + + reader = DefaultFileReader() + with pytest.raises(FileAccessError, match="Error opening file"): + reader.read_file("test.txt") + + def test_code_package_exists_true(self): + """Test _code_package_exists when directory exists.""" + with patch("os.path.exists", return_value=True): + reader = DefaultFileReader() + assert reader._code_package_exists() is True + + def test_code_package_exists_false(self): + """Test _code_package_exists when directory doesn't exist.""" + with patch("os.path.exists", return_value=False): + reader = DefaultFileReader() + assert reader._code_package_exists() is False + + def test_get_code_package_file_path(self): + """Test _get_code_package_file_path.""" + reader = DefaultFileReader() + result = reader._get_code_package_file_path("test.txt") + expected = Path.cwd().joinpath("payload", "files", "test.txt") + assert result == expected + + def test_get_config_based_file_path(self): + """Test _get_config_based_file_path.""" + reader = DefaultFileReader() + config_path = Path("/test/config.json") + result = reader._get_config_based_file_path("test.txt", config_path) + expected = Path("/test/files/test.txt") + assert result == expected + + def test_find_file_in_tree_found(self): + """Test _find_file_in_tree when file is found.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + test_file = temp_path / "test.txt" + test_file.write_text("test content") + + reader = DefaultFileReader() + result = reader._find_file_in_tree("test.txt", temp_path) + + assert result == test_file + + def test_find_file_in_tree_not_found(self): + """Test _find_file_in_tree when file is not found.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + reader = DefaultFileReader() + result = reader._find_file_in_tree("nonexistent.txt", temp_path) + + assert result is None + + def test_open_file(self): + """Test _open_file method.""" + reader = DefaultFileReader() + + with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file: + temp_file.write("test content") + temp_file_path = Path(temp_file.name) + + try: + with reader._open_file(temp_file_path) as f: + content = f.read() + assert content == "test content" + finally: + os.unlink(temp_file_path) + + def test_get_search_locations_with_code_package(self): + """Test get_search_locations when code package exists.""" + with patch( + "datacustomcode.file.reader.default.DefaultFileReader._code_package_exists", + return_value=True, + ): + with patch( + "datacustomcode.file.reader.default.DefaultFileReader._find_config_file", + return_value=None, + ): + reader = DefaultFileReader() + locations = reader.get_search_locations() + + assert len(locations) == 1 + expected = Path.cwd().joinpath("payload", "files") + assert locations[0] == expected + + def test_get_search_locations_with_config(self): + """Test get_search_locations when config file exists.""" + with patch( + "datacustomcode.file.reader.default.DefaultFileReader._code_package_exists", + return_value=False, + ): + with patch( + "datacustomcode.file.reader.default.DefaultFileReader._find_config_file", + return_value=Path("/test/config.json"), + ): + reader = DefaultFileReader() + locations = reader.get_search_locations() + + assert len(locations) == 1 + expected = Path("/test/files") + assert locations[0] == expected + + def test_get_search_locations_both(self): + """Test get_search_locations when both locations exist.""" + with patch( + "datacustomcode.file.reader.default.DefaultFileReader._code_package_exists", + return_value=True, + ): + with patch( + "datacustomcode.file.reader.default.DefaultFileReader._find_config_file", + return_value=Path("/test/config.json"), + ): + reader = DefaultFileReader() + locations = reader.get_search_locations() + + assert len(locations) == 2 + expected_code_package = Path.cwd().joinpath("payload", "files") + expected_config = Path("/test/files") + assert locations[0] == expected_code_package + assert locations[1] == expected_config + + def test_resolve_file_path_returns_filename_when_not_found(self): + """Test _resolve_file_path returns Path(file_name) + when file not found in any location.""" + reader = DefaultFileReader() + + # Mock both code package and config file to not exist or not contain the file + with ( + patch.object(reader, "_code_package_exists", return_value=False), + patch.object(reader, "_find_config_file", return_value=None), + ): + + result = reader._resolve_file_path("nonexistent.txt") + + # Should return the filename as a Path object + assert result == Path("nonexistent.txt") + assert isinstance(result, Path) + + def test_file_path_returns_filename_when_code_package_exists_file_not_found( + self, + ): + """Test _resolve_file_path returns Path(file_name) + when code package exists but file not found.""" + reader = DefaultFileReader() + + # Mock code package exists but file doesn't exist in it + with ( + patch.object(reader, "_code_package_exists", return_value=True), + patch.object(reader, "_get_code_package_file_path") as mock_get_path, + patch.object(reader, "_find_config_file", return_value=None), + patch("pathlib.Path.exists", return_value=False), + ): + + mock_path = Path("/test/payload/files/nonexistent.txt") + mock_get_path.return_value = mock_path + + result = reader._resolve_file_path("nonexistent.txt") + + # Should return the filename as a Path object + assert result == Path("nonexistent.txt") + assert isinstance(result, Path) + + def test_file_path_returns_filename_when_config_file_exists_file_not_found( + self, + ): + """Test _resolve_file_path returns Path(file_name) + when config file exists but file not found.""" + reader = DefaultFileReader() + + # Mock code package doesn't exist but config file exists + with ( + patch.object(reader, "_code_package_exists", return_value=False), + patch.object(reader, "_find_config_file") as mock_find_config, + patch.object(reader, "_get_config_based_file_path") as mock_get_config_path, + patch("pathlib.Path.exists", return_value=False), + ): + + # Mock config file found + mock_config_path = Path("/test/config.json") + mock_find_config.return_value = mock_config_path + + # Mock the config-based path to not exist + mock_path = Path("/test/files/nonexistent.txt") + mock_get_config_path.return_value = mock_path + + result = reader._resolve_file_path("nonexistent.txt") + + # Should return the filename as a Path object + assert result == Path("nonexistent.txt") + assert isinstance(result, Path) + + +class TestFileReaderIntegration: + """Integration tests for the file reader.""" + + def test_full_file_resolution_flow(self): + """Test the complete file resolution and opening flow.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create a mock payload/files structure + payload_dir = temp_path / "payload" + files_dir = payload_dir / "files" + files_dir.mkdir(parents=True) + + test_file = files_dir / "test.txt" + test_file.write_text("test content") + + # Change to temp directory and test + original_cwd = os.getcwd() + try: + os.chdir(temp_path) + + reader = DefaultFileReader() + with reader.read_file("test.txt") as f: + content = f.read() + assert content == "test content" + finally: + os.chdir(original_cwd) + + def test_fallback_to_config_based_location(self): + """Test fallback from code package to config-based location.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create config.json but no payload directory + config_file = temp_path / "config.json" + config_file.write_text("{}") + + files_dir = temp_path / "files" + files_dir.mkdir() + + test_file = files_dir / "test.txt" + test_file.write_text("test content") + + # Change to temp directory and test + original_cwd = os.getcwd() + try: + os.chdir(temp_path) + + reader = DefaultFileReader() + with reader.read_file("test.txt") as f: + content = f.read() + assert content == "test content" + finally: + os.chdir(original_cwd) + + +class TestFileReaderErrorHandling: + """Test error handling scenarios.""" + + def test_file_reader_error_inheritance(self): + """Test that FileReaderError is the base exception.""" + assert issubclass(FileNotFoundError, FileReaderError) + assert issubclass(FileAccessError, FileReaderError) + + def test_file_not_found_error_message(self): + """Test FileNotFoundError message formatting.""" + error = FileNotFoundError("test.txt") + assert "test.txt" in str(error) + + def test_file_access_error_message(self): + """Test FileAccessError message formatting.""" + error = FileAccessError("test.txt", "Permission denied") + assert "test.txt" in str(error) + assert "Permission denied" in str(error) From 67a0a9ac75f3232680eb2f5836c66e3108ce9d91 Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Wed, 17 Sep 2025 11:54:15 -0700 Subject: [PATCH 3/6] remove unecessary file --- docs/file_reader_refactoring.md | 179 -------------------------------- 1 file changed, 179 deletions(-) delete mode 100644 docs/file_reader_refactoring.md diff --git a/docs/file_reader_refactoring.md b/docs/file_reader_refactoring.md deleted file mode 100644 index a1d88c5..0000000 --- a/docs/file_reader_refactoring.md +++ /dev/null @@ -1,179 +0,0 @@ -# DefaultFileReader Class Refactoring - -## Overview - -The `DefaultFileReader` class has been refactored to improve testability, readability, and maintainability. This document outlines the changes made and how to use the new implementation. - -## Key Improvements - -### 1. **Separation of Concerns** -- **File path resolution** is now handled by dedicated methods -- **File opening** is separated from path resolution -- **Configuration management** is centralized and configurable - -### 2. **Enhanced Testability** -- **Dependency injection** through constructor parameters -- **Mockable methods** for unit testing -- **Clear interfaces** between different responsibilities -- **Comprehensive test coverage** with isolated test cases - -### 3. **Better Error Handling** -- **Custom exception hierarchy** for different error types -- **Descriptive error messages** with context -- **Proper exception chaining** for debugging - -### 4. **Improved Configuration** -- **Configurable defaults** that can be overridden -- **Environment-specific settings** support -- **Clear configuration contract** - -### 5. **Enhanced Readability** -- **Comprehensive docstrings** for all methods -- **Clear method names** that describe their purpose -- **Logical method organization** from public to private -- **Type hints** throughout the codebase - -## Class Structure - -### DefaultFileReader -The main class that provides the file reading framework: - -```python -class DefaultFileReader(BaseDataAccessLayer): - # Configuration constants - DEFAULT_CODE_PACKAGE = 'payload' - DEFAULT_FILE_FOLDER = 'files' - DEFAULT_CONFIG_FILE = 'config.json' - - def __init__(self, code_package=None, file_folder=None, config_file=None): - # Initialize with custom or default configuration - - def file_open(self, file_name: str) -> io.TextIOWrapper: - # Main public method for opening files - - def get_search_locations(self) -> list[Path]: - # Get all possible search locations -``` - -## Exception Hierarchy - -```python -FileReaderError (base) -├── FileNotFoundError (file not found in any location) -└── FileAccessError (permission, I/O errors, etc.) -``` - -## Usage Examples - -### Basic Usage -```python -from datacustomcode.file.reader.default import DefaultFileReader - -# Use default configuration -reader = DefaultFileReader() -with reader.file_open('data.csv') as f: - content = f.read() -``` - -### Custom Configuration -```python -from datacustomcode.file.reader.default import DefaultFileReader - -# Custom configuration -reader = DefaultFileReader( - code_package='my_package', - file_folder='data', - config_file='settings.json' -) -``` - -### Error Handling -```python -try: - with reader.file_open('data.csv') as f: - content = f.read() -except FileNotFoundError as e: - print(f"File not found: {e}") -except FileAccessError as e: - print(f"Access error: {e}") -``` - -## File Resolution Strategy - -The file reader uses a two-tier search strategy: - -1. **Primary Location**: `{code_package}/{file_folder}/{filename}` -2. **Fallback Location**: `{config_file_parent}/{file_folder}/{filename}` - -This allows for flexible deployment scenarios where files might be in different locations depending on the environment. - -## Testing - -### Unit Tests -The refactored class includes comprehensive unit tests covering: -- Configuration initialization -- File path resolution -- Error handling scenarios -- File opening operations -- Search location determination - -### Mocking -The class is designed for easy mocking in tests: -```python -from unittest.mock import patch - -with patch('DefaultFileReader._resolve_file_path') as mock_resolve: - mock_resolve.return_value = Path('/test/file.txt') - # Test file opening logic -``` - -### Integration Tests -Integration tests verify the complete file resolution and opening flow using temporary directories and real file operations. - -## Migration Guide - -### From Old Implementation -The old implementation had these issues: -- Hardcoded configuration values -- Mixed responsibilities in single methods -- Limited error handling -- Difficult to test - -### To New Implementation -1. **Update imports**: Use `DefaultFileReader` from `datacustomcode.file.reader.default` -2. **Error handling**: Catch specific exceptions instead of generic ones -3. **Configuration**: Use constructor parameters for custom settings -4. **Testing**: Leverage the new mockable methods - -## Benefits - -### For Developers -- **Easier debugging** with clear error messages -- **Better IDE support** with type hints and docstrings -- **Simplified testing** with dependency injection -- **Clearer code structure** with separated responsibilities - -### For Maintainers -- **Easier to extend** with new file resolution strategies -- **Better error tracking** with custom exception types -- **Improved test coverage** with isolated test cases -- **Clearer documentation** with comprehensive docstrings - -### For Users -- **More reliable** with proper error handling -- **More flexible** with configurable behavior -- **Better debugging** with descriptive error messages -- **Consistent interface** across different implementations - -## Future Enhancements - -The refactored structure makes it easy to add: -- **Additional file resolution strategies** (URLs, cloud storage, etc.) -- **File format detection** and automatic handling -- **Caching mechanisms** for frequently accessed files -- **Async file operations** for better performance -- **File validation** and integrity checking - -## Conclusion - -The refactored `DefaultFileReader` class provides a solid foundation for file reading operations while maintaining backward compatibility. The improvements in testability, readability, and maintainability make it easier to develop, test, and maintain file reading functionality in the Data Cloud Custom Code SDK. From 5119c373fe0eb5af8ca5e14bcc93ce96f95f132f Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Wed, 17 Sep 2025 12:12:52 -0700 Subject: [PATCH 4/6] update README --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 900ba5e..b2387d5 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,17 @@ The SDK automatically handles all dependency packaging for Data Cloud deployment **No need to worry about platform compatibility** - the SDK handles this automatically through the Docker-based packaging process. +## files directory + +``` +. +├── payload +│ ├── config.json +│ ├── entrypoint.py +├── files +│ ├── data.csv +``` + ## py-files directory Your Python dependencies can be packaged as .py files, .zip archives (containing multiple .py files or a Python package structure), or .egg files. From ba209d2ebd53cf57ed4a045da789c16e37a8d4fb Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Wed, 17 Sep 2025 16:03:01 -0700 Subject: [PATCH 5/6] Another README update --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b2387d5..5e024b7 100644 --- a/README.md +++ b/README.md @@ -209,6 +209,7 @@ Argument: Options: - `--config-file TEXT`: Path to configuration file - `--dependencies TEXT`: Additional dependencies (can be specified multiple times) +- `--profile TEXT`: Credential profile name (default: "default") #### `datacustomcode zip` From dd97df4fd91a5f0887286d81e54c81b495c21902 Mon Sep 17 00:00:00 2001 From: Jesus Orosco Date: Thu, 18 Sep 2025 08:07:36 -0700 Subject: [PATCH 6/6] refactor to just return file path --- README.md | 2 +- src/datacustomcode/client.py | 14 +- src/datacustomcode/file/reader/__init__.py | 14 -- src/datacustomcode/file/reader/default.py | 203 ---------------- tests/test_credentials_profile_integration.py | 220 ++++++++++++++++++ ..._file_reader.py => test_find_file_path.py} | 178 ++++---------- 6 files changed, 272 insertions(+), 359 deletions(-) delete mode 100644 src/datacustomcode/file/reader/__init__.py delete mode 100644 src/datacustomcode/file/reader/default.py create mode 100644 tests/test_credentials_profile_integration.py rename tests/{test_file_reader.py => test_find_file_path.py} (59%) diff --git a/README.md b/README.md index 5e024b7..aa2a313 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing Your entry point script will define logic using the `Client` object which wraps data access layers. You should only need the following methods: -* `read_file(file_name)` - Returns a file handle of the provided file_name +* `find_file_path(file_name)` - Returns a file path * `read_dlo(name)` – Read from a Data Lake Object by name * `read_dmo(name)` – Read from a Data Model Object by name * `write_to_dlo(name, spark_dataframe, write_mode)` – Write to a Data Model Object by name with a Spark dataframe diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index 58ec656..40f676f 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -24,11 +24,11 @@ from pyspark.sql import SparkSession from datacustomcode.config import SparkConfig, config -from datacustomcode.file.reader.default import DefaultFileReader +from datacustomcode.file.path.default import DefaultFindFilePath from datacustomcode.io.reader.base import BaseDataCloudReader if TYPE_CHECKING: - import io + from pathlib import Path from pyspark.sql import DataFrame as PySparkDataFrame @@ -103,11 +103,13 @@ class Client: writing, we print to the console instead of writing to Data Cloud. Args: + finder: Find a file path reader: A custom reader to use for reading Data Cloud objects. writer: A custom writer to use for writing Data Cloud objects. Example: >>> client = Client() + >>> file_path = client.find_file_path("data.csv") >>> dlo = client.read_dlo("my_dlo") >>> client.write_to_dmo("my_dmo", dlo) """ @@ -115,7 +117,7 @@ class Client: _instance: ClassVar[Optional[Client]] = None _reader: BaseDataCloudReader _writer: BaseDataCloudWriter - _file: DefaultFileReader + _file: DefaultFindFilePath _data_layer_history: dict[DataCloudObjectType, set[str]] def __new__( @@ -158,7 +160,7 @@ def __new__( writer_init = writer cls._instance._reader = reader_init cls._instance._writer = writer_init - cls._instance._file = DefaultFileReader() + cls._instance._file = DefaultFindFilePath() cls._instance._data_layer_history = { DataCloudObjectType.DLO: set(), DataCloudObjectType.DMO: set(), @@ -217,10 +219,10 @@ def write_to_dmo( self._validate_data_layer_history_does_not_contain(DataCloudObjectType.DLO) return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) - def read_file(self, file_name: str) -> io.TextIOWrapper: + def find_file_path(self, file_name: str) -> Path: """Read a file from the local file system.""" - return self._file.read_file(file_name) + return self._file.find_file_path(file_name) def _validate_data_layer_history_does_not_contain( self, data_cloud_object_type: DataCloudObjectType diff --git a/src/datacustomcode/file/reader/__init__.py b/src/datacustomcode/file/reader/__init__.py deleted file mode 100644 index 93988ff..0000000 --- a/src/datacustomcode/file/reader/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2025, Salesforce, Inc. -# SPDX-License-Identifier: Apache-2 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/src/datacustomcode/file/reader/default.py b/src/datacustomcode/file/reader/default.py deleted file mode 100644 index 22f6b6f..0000000 --- a/src/datacustomcode/file/reader/default.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) 2025, Salesforce, Inc. -# SPDX-License-Identifier: Apache-2 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import annotations - -import os -from pathlib import Path -from typing import TYPE_CHECKING, Optional - -from datacustomcode.file.base import BaseDataAccessLayer - -if TYPE_CHECKING: - import io - - -class FileReaderError(Exception): - """Base exception for file reader operations.""" - - -class FileNotFoundError(FileReaderError): - """Raised when a file cannot be found.""" - - -class FileAccessError(FileReaderError): - """Raised when there's an error accessing a file.""" - - -class DefaultFileReader(BaseDataAccessLayer): - """Base class for file reading operations. - - This class provides a framework for reading files from various locations - with configurable search strategies and error handling. - """ - - # Default configuration values - DEFAULT_CODE_PACKAGE = "payload" - DEFAULT_FILE_FOLDER = "files" - DEFAULT_CONFIG_FILE = "config.json" - - def __init__( - self, - code_package: Optional[str] = None, - file_folder: Optional[str] = None, - config_file: Optional[str] = None, - ): - """Initialize the file reader with configuration. - - Args: - code_package: The default code package directory to search - file_folder: The folder containing files relative to the code package - config_file: The configuration file to use for path resolution - """ - self.code_package = code_package or self.DEFAULT_CODE_PACKAGE - self.file_folder = file_folder or self.DEFAULT_FILE_FOLDER - self.config_file = config_file or self.DEFAULT_CONFIG_FILE - - def read_file(self, file_name: str) -> "io.TextIOWrapper": - """Open a file for reading. - - Args: - file_name: The name of the file to open - - Returns: - A file handle for reading - - Raises: - FileNotFoundError: If the file cannot be found - FileAccessError: If there's an error opening the file - """ - if not file_name: - raise ValueError("file_name cannot be empty") - - file_path = self._resolve_file_path(file_name) - - if not file_path: - raise FileNotFoundError( - f"File '{file_name}' not found in any search location" - ) - - try: - return self._open_file(file_path) - except (OSError, IOError) as e: - raise FileAccessError(f"Error opening file '{file_path}': {e}") from e - - def _resolve_file_path(self, file_name: str) -> Optional[Path]: - """Resolve the full path to a file. - - Args: - file_name: The name of the file to resolve - - Returns: - The full path to the file, or None if not found - """ - # First try the default code package location - if self._code_package_exists(): - file_path = self._get_code_package_file_path(file_name) - if file_path.exists(): - return file_path - - # Fall back to config.json-based location - config_path = self._find_config_file() - if config_path: - file_path = self._get_config_based_file_path(file_name, config_path) - if file_path.exists(): - return file_path - - return Path(file_name) - - def _code_package_exists(self) -> bool: - """Check if the default code package directory exists. - - Returns: - True if the code package directory exists - """ - return os.path.exists(self.code_package) - - def _get_code_package_file_path(self, file_name: str) -> Path: - """Get the file path relative to the code package. - - Args: - file_name: The name of the file - - Returns: - The full path to the file - """ - relative_path = f"{self.code_package}/{self.file_folder}/{file_name}" - return Path.cwd().joinpath(relative_path) - - def _find_config_file(self) -> Optional[Path]: - """Find the configuration file in the current directory tree. - - Returns: - The path to the config file, or None if not found - """ - return self._find_file_in_tree(self.config_file, Path.cwd()) - - def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path: - """Get the file path relative to the config file location. - - Args: - file_name: The name of the file - config_path: The path to the config file - - Returns: - The full path to the file - """ - relative_path = f"{self.file_folder}/{file_name}" - return config_path.parent.joinpath(relative_path) - - def _find_file_in_tree(self, filename: str, search_path: Path) -> Optional[Path]: - """Find a file within a directory tree. - - Args: - filename: The name of the file to find - search_path: The root directory to search from - - Returns: - The full path to the file, or None if not found - """ - for file_path in search_path.rglob(filename): - return file_path - return None - - def _open_file(self, file_path: Path) -> "io.TextIOWrapper": - """Open a file at the given path. - - Args: - file_path: The path to the file - - Returns: - A file handle for reading - """ - return open(file_path, "r", encoding="utf-8") - - def get_search_locations(self) -> list[Path]: - """Get all possible search locations for files. - - Returns: - A list of paths where files might be found - """ - locations = [] - - # Add code package location - if self._code_package_exists(): - locations.append(Path.cwd().joinpath(self.code_package, self.file_folder)) - - # Add config-based location - config_path = self._find_config_file() - if config_path: - locations.append(config_path.parent.joinpath(self.file_folder)) - - return locations diff --git a/tests/test_credentials_profile_integration.py b/tests/test_credentials_profile_integration.py new file mode 100644 index 0000000..8763754 --- /dev/null +++ b/tests/test_credentials_profile_integration.py @@ -0,0 +1,220 @@ +""" +Integration tests for credentials profile functionality. + +This module tests the complete flow of using different credentials profiles +with the DataCloud Custom Code Python SDK components. +""" + +from __future__ import annotations + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from datacustomcode.config import config +from datacustomcode.credentials import Credentials +from datacustomcode.io.reader.query_api import QueryAPIDataCloudReader +from datacustomcode.io.writer.print import PrintDataCloudWriter + + +class TestCredentialsProfileIntegration: + """Test integration of credentials profile functionality across components.""" + + def test_query_api_reader_with_custom_profile(self): + """Test QueryAPIDataCloudReader uses custom credentials profile.""" + mock_spark = MagicMock() + + with patch("datacustomcode.credentials.Credentials.from_available") as mock_from_available: + # Mock credentials for custom profile + mock_credentials = MagicMock() + mock_credentials.login_url = "https://custom.salesforce.com" + mock_credentials.username = "custom@example.com" + mock_credentials.password = "custom_password" + mock_credentials.client_id = "custom_client_id" + mock_credentials.client_secret = "custom_secret" + mock_from_available.return_value = mock_credentials + + # Mock the SalesforceCDPConnection + with patch("datacustomcode.io.reader.query_api.SalesforceCDPConnection") as mock_conn_class: + mock_conn = MagicMock() + mock_conn_class.return_value = mock_conn + + # Test with custom profile + reader = QueryAPIDataCloudReader(mock_spark, credentials_profile="custom_profile") + + # Verify the correct profile was used + mock_from_available.assert_called_with(profile="custom_profile") + + # Verify the connection was created with the custom credentials + mock_conn_class.assert_called_once_with( + "https://custom.salesforce.com", + "custom@example.com", + "custom_password", + "custom_client_id", + "custom_secret", + ) + + def test_print_writer_with_custom_profile(self): + """Test PrintDataCloudWriter uses custom credentials profile.""" + mock_spark = MagicMock() + + with patch("datacustomcode.credentials.Credentials.from_available") as mock_from_available: + # Mock credentials for custom profile + mock_credentials = MagicMock() + mock_credentials.login_url = "https://custom.salesforce.com" + mock_credentials.username = "custom@example.com" + mock_credentials.password = "custom_password" + mock_credentials.client_id = "custom_client_id" + mock_credentials.client_secret = "custom_secret" + mock_from_available.return_value = mock_credentials + + # Mock the SalesforceCDPConnection + with patch("datacustomcode.io.reader.query_api.SalesforceCDPConnection") as mock_conn_class: + mock_conn = MagicMock() + mock_conn_class.return_value = mock_conn + + # Test with custom profile + writer = PrintDataCloudWriter(mock_spark, credentials_profile="custom_profile") + + # Verify the correct profile was used + mock_from_available.assert_called_with(profile="custom_profile") + + # Verify the writer has the reader with custom credentials + assert writer.reader is not None + assert isinstance(writer.reader, QueryAPIDataCloudReader) + + def test_config_override_with_environment_variable(self): + """Test that environment variable overrides config credentials profile.""" + # Set environment variable + os.environ["SFDC_CREDENTIALS_PROFILE"] = "env_profile" + + try: + # Simulate what happens in entrypoint.py + credentials_profile = os.environ.get("SFDC_CREDENTIALS_PROFILE", "default") + assert credentials_profile == "env_profile" + + # Update both reader and writer configs + if config.reader_config and hasattr(config.reader_config, 'options'): + config.reader_config.options["credentials_profile"] = credentials_profile + + if config.writer_config and hasattr(config.writer_config, 'options'): + config.writer_config.options["credentials_profile"] = credentials_profile + + # Verify the configs were updated + assert config.reader_config.options["credentials_profile"] == "env_profile" + assert config.writer_config.options["credentials_profile"] == "env_profile" + + finally: + # Clean up + del os.environ["SFDC_CREDENTIALS_PROFILE"] + + def test_config_override_programmatically(self): + """Test programmatic override of credentials profile.""" + custom_profile = "programmatic_profile" + + # Update both reader and writer configs programmatically + if config.reader_config and hasattr(config.reader_config, 'options'): + config.reader_config.options["credentials_profile"] = custom_profile + + if config.writer_config and hasattr(config.writer_config, 'options'): + config.writer_config.options["credentials_profile"] = custom_profile + + # Verify the configs were updated + assert config.reader_config.options["credentials_profile"] == custom_profile + assert config.writer_config.options["credentials_profile"] == custom_profile + + def test_default_profile_behavior(self): + """Test that default profile is used when no override is specified.""" + # Reset to default values + if config.reader_config and hasattr(config.reader_config, 'options'): + config.reader_config.options["credentials_profile"] = "default" + + if config.writer_config and hasattr(config.writer_config, 'options'): + config.writer_config.options["credentials_profile"] = "default" + + # Verify default values + assert config.reader_config.options["credentials_profile"] == "default" + assert config.writer_config.options["credentials_profile"] == "default" + + def test_credentials_profile_consistency(self): + """Test that reader and writer use the same credentials profile.""" + mock_spark = MagicMock() + test_profile = "consistent_profile" + + with patch("datacustomcode.credentials.Credentials.from_available") as mock_from_available: + # Mock credentials + mock_credentials = MagicMock() + mock_credentials.login_url = "https://consistent.salesforce.com" + mock_credentials.username = "consistent@example.com" + mock_credentials.password = "consistent_password" + mock_credentials.client_id = "consistent_client_id" + mock_credentials.client_secret = "consistent_secret" + mock_from_available.return_value = mock_credentials + + # Mock the SalesforceCDPConnection + with patch("datacustomcode.io.reader.query_api.SalesforceCDPConnection") as mock_conn_class: + mock_conn = MagicMock() + mock_conn_class.return_value = mock_conn + + # Create reader and writer with same profile + reader = QueryAPIDataCloudReader(mock_spark, credentials_profile=test_profile) + writer = PrintDataCloudWriter(mock_spark, credentials_profile=test_profile) + + # Verify both used the same profile + assert mock_from_available.call_count == 2 + for call in mock_from_available.call_args_list: + assert call[1]["profile"] == test_profile + + # Verify both have the same credentials + assert reader._conn is not None + assert writer.reader._conn is not None + + def test_multiple_profiles_isolation(self): + """Test that different profiles are properly isolated.""" + mock_spark = MagicMock() + + with patch("datacustomcode.credentials.Credentials.from_available") as mock_from_available: + # Mock different credentials for different profiles + def mock_credentials_side_effect(profile="default"): + mock_creds = MagicMock() + if profile == "profile1": + mock_creds.login_url = "https://profile1.salesforce.com" + mock_creds.username = "profile1@example.com" + elif profile == "profile2": + mock_creds.login_url = "https://profile2.salesforce.com" + mock_creds.username = "profile2@example.com" + else: # default + mock_creds.login_url = "https://default.salesforce.com" + mock_creds.username = "default@example.com" + + mock_creds.password = f"{profile}_password" + mock_creds.client_id = f"{profile}_client_id" + mock_creds.client_secret = f"{profile}_secret" + return mock_creds + + mock_from_available.side_effect = mock_credentials_side_effect + + # Mock the SalesforceCDPConnection + with patch("datacustomcode.io.reader.query_api.SalesforceCDPConnection") as mock_conn_class: + mock_conn = MagicMock() + mock_conn_class.return_value = mock_conn + + # Create readers with different profiles + reader1 = QueryAPIDataCloudReader(mock_spark, credentials_profile="profile1") + reader2 = QueryAPIDataCloudReader(mock_spark, credentials_profile="profile2") + reader_default = QueryAPIDataCloudReader(mock_spark, credentials_profile="default") + + # Verify each reader used the correct profile + calls = mock_from_available.call_args_list + assert len(calls) == 3 + + # Check that each call used the correct profile + profiles_used = [call[1]["profile"] for call in calls] + assert "profile1" in profiles_used + assert "profile2" in profiles_used + assert "default" in profiles_used + + + + diff --git a/tests/test_file_reader.py b/tests/test_find_file_path.py similarity index 59% rename from tests/test_file_reader.py rename to tests/test_find_file_path.py index 184f7bb..1bb724e 100644 --- a/tests/test_file_reader.py +++ b/tests/test_find_file_path.py @@ -16,31 +16,30 @@ import os from pathlib import Path import tempfile -from unittest.mock import Mock, patch +from unittest.mock import patch import pytest -from datacustomcode.file.reader.default import ( - DefaultFileReader, - FileAccessError, +from datacustomcode.file.path.default import ( + DefaultFindFilePath, FileNotFoundError, FileReaderError, ) -class TestDefaultFileReader: - """Test cases for the DefaultFileReader class.""" +class TestDefaultFindFilePath: + """Test cases for the DefaultFindFilePath class.""" def test_init_with_defaults(self): """Test initialization with default values.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() assert reader.code_package == "payload" assert reader.file_folder == "files" assert reader.config_file == "config.json" def test_init_with_custom_values(self): """Test initialization with custom values.""" - reader = DefaultFileReader( + reader = DefaultFindFilePath( code_package="custom_package", file_folder="custom_files", config_file="custom_config.json", @@ -51,79 +50,63 @@ def test_init_with_custom_values(self): def test_file_open_with_empty_filename(self): """Test that read_file raises ValueError for empty filename.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() with pytest.raises(ValueError, match="file_name cannot be empty"): - reader.read_file("") + reader.find_file_path("") def test_file_open_with_none_filename(self): """Test that read_file raises ValueError for None filename.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() with pytest.raises(ValueError, match="file_name cannot be empty"): - reader.read_file(None) + reader.find_file_path(None) - @patch("datacustomcode.file.reader.default.DefaultFileReader._resolve_file_path") - @patch("datacustomcode.file.reader.default.DefaultFileReader._open_file") - def test_file_open_success(self, mock_open_file, mock_resolve_path): - """Test successful file opening.""" + @patch("datacustomcode.file.path.default.DefaultFindFilePath._resolve_file_path") + def test_file_open_success(self, mock_resolve_path): + """Test successful file path finding.""" mock_path = Path("/test/path/file.txt") - mock_file_handle = Mock() mock_resolve_path.return_value = mock_path - mock_open_file.return_value = mock_file_handle - reader = DefaultFileReader() - result = reader.read_file("test.txt") + reader = DefaultFindFilePath() + result = reader.find_file_path("test.txt") - assert result == mock_file_handle + assert result == mock_path mock_resolve_path.assert_called_once_with("test.txt") - mock_open_file.assert_called_once_with(mock_path) - @patch("datacustomcode.file.reader.default.DefaultFileReader._resolve_file_path") + @patch("datacustomcode.file.path.default.DefaultFindFilePath._resolve_file_path") def test_file_open_file_not_found(self, mock_resolve_path): - """Test read_file when file is not found.""" + """Test find_file_path when file is not found.""" mock_resolve_path.return_value = None - reader = DefaultFileReader() + reader = DefaultFindFilePath() with pytest.raises(FileNotFoundError, match="File 'test.txt' not found"): - reader.read_file("test.txt") - - @patch("datacustomcode.file.reader.default.DefaultFileReader._resolve_file_path") - @patch("datacustomcode.file.reader.default.DefaultFileReader._open_file") - def test_file_open_access_error(self, mock_open_file, mock_resolve_path): - """Test read_file when there's an access error.""" - mock_path = Path("/test/path/file.txt") - mock_resolve_path.return_value = mock_path - mock_open_file.side_effect = PermissionError("Permission denied") - - reader = DefaultFileReader() - with pytest.raises(FileAccessError, match="Error opening file"): - reader.read_file("test.txt") + reader.find_file_path("test.txt") def test_code_package_exists_true(self): """Test _code_package_exists when directory exists.""" with patch("os.path.exists", return_value=True): - reader = DefaultFileReader() + reader = DefaultFindFilePath() assert reader._code_package_exists() is True def test_code_package_exists_false(self): """Test _code_package_exists when directory doesn't exist.""" with patch("os.path.exists", return_value=False): - reader = DefaultFileReader() + reader = DefaultFindFilePath() assert reader._code_package_exists() is False def test_get_code_package_file_path(self): """Test _get_code_package_file_path.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() result = reader._get_code_package_file_path("test.txt") - expected = Path.cwd().joinpath("payload", "files", "test.txt") + expected = Path("payload/files/test.txt") assert result == expected def test_get_config_based_file_path(self): """Test _get_config_based_file_path.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() config_path = Path("/test/config.json") result = reader._get_config_based_file_path("test.txt", config_path) - expected = Path("/test/files/test.txt") + expected = Path("files/test.txt") assert result == expected def test_find_file_in_tree_found(self): @@ -133,7 +116,7 @@ def test_find_file_in_tree_found(self): test_file = temp_path / "test.txt" test_file.write_text("test content") - reader = DefaultFileReader() + reader = DefaultFindFilePath() result = reader._find_file_in_tree("test.txt", temp_path) assert result == test_file @@ -143,83 +126,15 @@ def test_find_file_in_tree_not_found(self): with tempfile.TemporaryDirectory() as temp_dir: temp_path = Path(temp_dir) - reader = DefaultFileReader() + reader = DefaultFindFilePath() result = reader._find_file_in_tree("nonexistent.txt", temp_path) assert result is None - def test_open_file(self): - """Test _open_file method.""" - reader = DefaultFileReader() - - with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file: - temp_file.write("test content") - temp_file_path = Path(temp_file.name) - - try: - with reader._open_file(temp_file_path) as f: - content = f.read() - assert content == "test content" - finally: - os.unlink(temp_file_path) - - def test_get_search_locations_with_code_package(self): - """Test get_search_locations when code package exists.""" - with patch( - "datacustomcode.file.reader.default.DefaultFileReader._code_package_exists", - return_value=True, - ): - with patch( - "datacustomcode.file.reader.default.DefaultFileReader._find_config_file", - return_value=None, - ): - reader = DefaultFileReader() - locations = reader.get_search_locations() - - assert len(locations) == 1 - expected = Path.cwd().joinpath("payload", "files") - assert locations[0] == expected - - def test_get_search_locations_with_config(self): - """Test get_search_locations when config file exists.""" - with patch( - "datacustomcode.file.reader.default.DefaultFileReader._code_package_exists", - return_value=False, - ): - with patch( - "datacustomcode.file.reader.default.DefaultFileReader._find_config_file", - return_value=Path("/test/config.json"), - ): - reader = DefaultFileReader() - locations = reader.get_search_locations() - - assert len(locations) == 1 - expected = Path("/test/files") - assert locations[0] == expected - - def test_get_search_locations_both(self): - """Test get_search_locations when both locations exist.""" - with patch( - "datacustomcode.file.reader.default.DefaultFileReader._code_package_exists", - return_value=True, - ): - with patch( - "datacustomcode.file.reader.default.DefaultFileReader._find_config_file", - return_value=Path("/test/config.json"), - ): - reader = DefaultFileReader() - locations = reader.get_search_locations() - - assert len(locations) == 2 - expected_code_package = Path.cwd().joinpath("payload", "files") - expected_config = Path("/test/files") - assert locations[0] == expected_code_package - assert locations[1] == expected_config - def test_resolve_file_path_returns_filename_when_not_found(self): - """Test _resolve_file_path returns Path(file_name) + """Test _resolve_file_path returns relative path with file folder when file not found in any location.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() # Mock both code package and config file to not exist or not contain the file with ( @@ -236,9 +151,9 @@ def test_resolve_file_path_returns_filename_when_not_found(self): def test_file_path_returns_filename_when_code_package_exists_file_not_found( self, ): - """Test _resolve_file_path returns Path(file_name) + """Test _resolve_file_path returns relative path with file folder when code package exists but file not found.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() # Mock code package exists but file doesn't exist in it with ( @@ -260,9 +175,9 @@ def test_file_path_returns_filename_when_code_package_exists_file_not_found( def test_file_path_returns_filename_when_config_file_exists_file_not_found( self, ): - """Test _resolve_file_path returns Path(file_name) + """Test _resolve_file_path returns relative path with file folder when config file exists but file not found.""" - reader = DefaultFileReader() + reader = DefaultFindFilePath() # Mock code package doesn't exist but config file exists with ( @@ -308,10 +223,10 @@ def test_full_file_resolution_flow(self): try: os.chdir(temp_path) - reader = DefaultFileReader() - with reader.read_file("test.txt") as f: - content = f.read() - assert content == "test content" + reader = DefaultFindFilePath() + file_path = reader.find_file_path("test.txt") + content = file_path.read_text() + assert content == "test content" finally: os.chdir(original_cwd) @@ -335,10 +250,10 @@ def test_fallback_to_config_based_location(self): try: os.chdir(temp_path) - reader = DefaultFileReader() - with reader.read_file("test.txt") as f: - content = f.read() - assert content == "test content" + reader = DefaultFindFilePath() + file_path = reader.find_file_path("test.txt") + content = file_path.read_text() + assert content == "test content" finally: os.chdir(original_cwd) @@ -349,15 +264,8 @@ class TestFileReaderErrorHandling: def test_file_reader_error_inheritance(self): """Test that FileReaderError is the base exception.""" assert issubclass(FileNotFoundError, FileReaderError) - assert issubclass(FileAccessError, FileReaderError) def test_file_not_found_error_message(self): """Test FileNotFoundError message formatting.""" error = FileNotFoundError("test.txt") assert "test.txt" in str(error) - - def test_file_access_error_message(self): - """Test FileAccessError message formatting.""" - error = FileAccessError("test.txt", "Permission denied") - assert "test.txt" in str(error) - assert "Permission denied" in str(error)