Skip to content

Commit 9eb0d98

Browse files
authored
Updated init and scan to support functions. (#52)
* Updated init and scan to support functions. * Use local SDK config. Added a local SDK-specific config file to store the type of code package for use with SDK commands.
1 parent 12d45f9 commit 9eb0d98

32 files changed

+769
-152
lines changed

src/datacustomcode/cli.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,18 +145,36 @@ def deploy(
145145

146146
@cli.command()
147147
@click.argument("directory", default=".")
148-
def init(directory: str):
149-
from datacustomcode.scan import dc_config_json_from_file
150-
from datacustomcode.template import copy_template
148+
@click.option(
149+
"--code-type", default="script", type=click.Choice(["script", "function"])
150+
)
151+
def init(directory: str, code_type: str):
152+
from datacustomcode.scan import (
153+
dc_config_json_from_file,
154+
update_config,
155+
write_sdk_config,
156+
)
157+
from datacustomcode.template import copy_function_template, copy_script_template
151158

152159
click.echo("Copying template to " + click.style(directory, fg="blue", bold=True))
153-
copy_template(directory)
160+
if code_type == "script":
161+
copy_script_template(directory)
162+
elif code_type == "function":
163+
copy_function_template(directory)
154164
entrypoint_path = os.path.join(directory, "payload", "entrypoint.py")
155165
config_location = os.path.join(os.path.dirname(entrypoint_path), "config.json")
156-
config_json = dc_config_json_from_file(entrypoint_path)
166+
167+
# Write package type to SDK-specific config
168+
sdk_config = {"type": code_type}
169+
write_sdk_config(directory, sdk_config)
170+
171+
config_json = dc_config_json_from_file(entrypoint_path, code_type)
157172
with open(config_location, "w") as f:
158173
json.dump(config_json, f, indent=2)
159174

175+
updated_config_json = update_config(entrypoint_path)
176+
with open(config_location, "w") as f:
177+
json.dump(updated_config_json, f, indent=2)
160178
click.echo(
161179
"Start developing by updating the code in "
162180
+ click.style(entrypoint_path, fg="blue", bold=True)
@@ -176,15 +194,15 @@ def init(directory: str):
176194
"--no-requirements", is_flag=True, help="Skip generating requirements.txt file"
177195
)
178196
def scan(filename: str, config: str, dry_run: bool, no_requirements: bool):
179-
from datacustomcode.scan import dc_config_json_from_file, write_requirements_file
197+
from datacustomcode.scan import update_config, write_requirements_file
180198

181199
config_location = config or os.path.join(os.path.dirname(filename), "config.json")
182200
click.echo(
183201
"Dumping scan results to config file: "
184202
+ click.style(config_location, fg="blue", bold=True)
185203
)
186204
click.echo("Scanning " + click.style(filename, fg="blue", bold=True) + "...")
187-
config_json = dc_config_json_from_file(filename)
205+
config_json = update_config(filename)
188206

189207
click.secho(json.dumps(config_json, indent=2), fg="yellow")
190208
if not dry_run:

src/datacustomcode/scan.py

Lines changed: 162 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
import ast
1818
import json
19-
import logging
2019
import os
2120
import sys
2221
from typing import (
@@ -27,26 +26,109 @@
2726
Union,
2827
)
2928

29+
from loguru import logger
3030
import pydantic
3131

3232
from datacustomcode.version import get_version
3333

34-
logger = logging.getLogger(__name__)
35-
3634
DATA_ACCESS_METHODS = ["read_dlo", "read_dmo", "write_to_dlo", "write_to_dmo"]
3735

3836
DATA_TRANSFORM_CONFIG_TEMPLATE = {
3937
"sdkVersion": get_version(),
4038
"entryPoint": "",
41-
"dataspace": "",
39+
"dataspace": "default",
4240
"permissions": {
4341
"read": {},
4442
"write": {},
4543
},
4644
}
4745

46+
FUNCTION_CONFIG_TEMPLATE = {
47+
"sdkVersion": get_version(),
48+
"entryPoint": "",
49+
}
4850
STANDARD_LIBS = set(sys.stdlib_module_names)
4951

52+
SDK_CONFIG_DIR = ".datacustomcode_proj"
53+
SDK_CONFIG_FILE = "sdk_config.json"
54+
55+
56+
def get_sdk_config_path(base_directory: str) -> str:
57+
"""Get the path to the SDK-specific config file.
58+
59+
Args:
60+
base_directory: The base directory of the project
61+
(where .datacustomcode should be)
62+
63+
Returns:
64+
The path to the SDK config file
65+
"""
66+
sdk_config_dir = os.path.join(base_directory, SDK_CONFIG_DIR)
67+
return os.path.join(sdk_config_dir, SDK_CONFIG_FILE)
68+
69+
70+
def read_sdk_config(base_directory: str) -> dict[str, Any]:
71+
"""Read the SDK-specific config file.
72+
73+
Args:
74+
base_directory: The base directory of the project
75+
76+
Returns:
77+
The SDK config dictionary, or empty dict if file doesn't exist
78+
"""
79+
config_path = get_sdk_config_path(base_directory)
80+
if os.path.exists(config_path) and os.path.isfile(config_path):
81+
try:
82+
with open(config_path, "r") as f:
83+
config_data: dict[str, Any] = json.load(f)
84+
return config_data
85+
except json.JSONDecodeError as e:
86+
raise ValueError(f"Failed to parse JSON from {config_path}: {e}") from e
87+
except OSError as e:
88+
raise OSError(f"Failed to read SDK config file {config_path}: {e}") from e
89+
else:
90+
raise FileNotFoundError(f"SDK config file not found at {config_path}")
91+
92+
93+
def write_sdk_config(base_directory: str, config: dict[str, Any]) -> None:
94+
"""Write the SDK-specific config file.
95+
96+
Args:
97+
base_directory: The base directory of the project
98+
config: The config dictionary to write
99+
"""
100+
config_path = get_sdk_config_path(base_directory)
101+
sdk_config_dir = os.path.dirname(config_path)
102+
os.makedirs(sdk_config_dir, exist_ok=True)
103+
with open(config_path, "w") as f:
104+
json.dump(config, f, indent=2)
105+
106+
107+
def get_package_type(base_directory: str) -> str:
108+
"""Get the package type (script or function) from SDK config.
109+
110+
Args:
111+
base_directory: The base directory of the project
112+
113+
Returns:
114+
The package type ("script" or "function")
115+
116+
Raises:
117+
ValueError: If the type is not found in the SDK config
118+
"""
119+
try:
120+
sdk_config = read_sdk_config(base_directory)
121+
except FileNotFoundError as e:
122+
logger.debug(f"Defaulting to script package type. {e}")
123+
return "script"
124+
if "type" not in sdk_config:
125+
config_path = get_sdk_config_path(base_directory)
126+
raise ValueError(
127+
f"Package type not found in SDK config at {config_path}. "
128+
"Please run 'datacustomcode init' to initialize the project."
129+
)
130+
return str(sdk_config["type"])
131+
50132

51133
class DataAccessLayerCalls(pydantic.BaseModel):
52134
read_dlo: frozenset[str]
@@ -230,57 +312,96 @@ def scan_file(file_path: str) -> DataAccessLayerCalls:
230312
return visitor.found()
231313

232314

233-
def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
315+
def dc_config_json_from_file(file_path: str, type: str) -> dict[str, Any]:
234316
"""Create a Data Cloud Custom Code config JSON from a script."""
235-
output = scan_file(file_path)
236-
config = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
317+
config: dict[str, Any]
318+
if type == "script":
319+
config = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
320+
elif type == "function":
321+
config = FUNCTION_CONFIG_TEMPLATE.copy()
237322
config["entryPoint"] = file_path.rpartition("/")[-1]
323+
return config
324+
325+
326+
def find_base_directory(file_path: str) -> str:
327+
"""Find the base directory containing .datacustomcode by walking up from file_path.
328+
329+
Args:
330+
file_path: Path to a file in the project
331+
332+
Returns:
333+
The base directory path, or the directory containing the file if not found
334+
"""
335+
current_dir = os.path.dirname(os.path.abspath(file_path))
336+
root = os.path.abspath(os.sep)
238337

338+
while current_dir != root:
339+
if os.path.exists(os.path.join(current_dir, SDK_CONFIG_DIR)):
340+
return current_dir
341+
current_dir = os.path.dirname(current_dir)
342+
343+
# If not found, assume the payload directory's parent is the base
344+
# (payload/entrypoint.py -> base directory is parent of payload)
345+
file_dir = os.path.dirname(os.path.abspath(file_path))
346+
if os.path.basename(file_dir) == "payload":
347+
return os.path.dirname(file_dir)
348+
return file_dir
349+
350+
351+
def update_config(file_path: str) -> dict[str, Any]:
239352
file_dir = os.path.dirname(file_path)
240353
config_json_path = os.path.join(file_dir, "config.json")
241-
354+
existing_config: dict[str, Any]
242355
if os.path.exists(config_json_path) and os.path.isfile(config_json_path):
243356
try:
244357
with open(config_json_path, "r") as f:
245358
existing_config = json.load(f)
246-
247-
if "dataspace" in existing_config:
248-
dataspace_value = existing_config["dataspace"]
249-
if not dataspace_value or (
250-
isinstance(dataspace_value, str) and dataspace_value.strip() == ""
251-
):
252-
logger.warning(
253-
f"dataspace in {config_json_path} is empty or None. "
254-
f"Updating config file to use dataspace 'default'. "
255-
)
256-
config["dataspace"] = "default"
257-
else:
258-
config["dataspace"] = dataspace_value
259-
else:
260-
raise ValueError(
261-
f"dataspace must be defined in {config_json_path}. "
262-
f"Please add a 'dataspace' field to the config.json file. "
263-
)
264359
except json.JSONDecodeError as e:
265360
raise ValueError(
266361
f"Failed to parse JSON from {config_json_path}: {e}"
267362
) from e
268363
except OSError as e:
269364
raise OSError(f"Failed to read config file {config_json_path}: {e}") from e
270365
else:
271-
config["dataspace"] = "default"
272-
273-
read: dict[str, list[str]] = {}
274-
if output.read_dlo:
275-
read["dlo"] = list(output.read_dlo)
276-
else:
277-
read["dmo"] = list(output.read_dmo)
278-
write: dict[str, list[str]] = {}
279-
if output.write_to_dlo:
280-
write["dlo"] = list(output.write_to_dlo)
366+
raise ValueError(f"config.json not found at {config_json_path}")
367+
368+
# Get package type from SDK config
369+
base_directory = find_base_directory(file_path)
370+
package_type = get_package_type(base_directory)
371+
372+
if package_type == "script":
373+
existing_config["dataspace"] = get_dataspace(existing_config)
374+
output = scan_file(file_path)
375+
read: dict[str, list[str]] = {}
376+
if output.read_dlo:
377+
read["dlo"] = list(output.read_dlo)
378+
else:
379+
read["dmo"] = list(output.read_dmo)
380+
write: dict[str, list[str]] = {}
381+
if output.write_to_dlo:
382+
write["dlo"] = list(output.write_to_dlo)
383+
else:
384+
write["dmo"] = list(output.write_to_dmo)
385+
386+
existing_config["permissions"] = {"read": read, "write": write}
387+
return existing_config
388+
389+
390+
def get_dataspace(existing_config: dict[str, str]) -> str:
391+
if "dataspace" in existing_config:
392+
dataspace_value = existing_config["dataspace"]
393+
if not dataspace_value or (
394+
isinstance(dataspace_value, str) and dataspace_value.strip() == ""
395+
):
396+
logger.warning(
397+
"dataspace is empty or None. "
398+
"Updating config file to use dataspace 'default'. "
399+
)
400+
return "default"
401+
else:
402+
return dataspace_value
281403
else:
282-
write["dmo"] = list(output.write_to_dmo)
283-
284-
config["permissions"] = {"read": read, "write": write}
285-
286-
return config
404+
raise ValueError(
405+
"dataspace must be defined. "
406+
"Please add a 'dataspace' field to the config.json file. "
407+
)

src/datacustomcode/template.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,31 @@
1717

1818
from loguru import logger
1919

20-
template_dir = os.path.join(os.path.dirname(__file__), "templates")
20+
script_template_dir = os.path.join(os.path.dirname(__file__), "templates", "script")
21+
function_template_dir = os.path.join(os.path.dirname(__file__), "templates", "function")
2122

2223

23-
def copy_template(target_dir: str) -> None:
24+
def copy_script_template(target_dir: str) -> None:
2425
"""Copy the template to the target directory."""
2526
os.makedirs(target_dir, exist_ok=True)
2627

27-
for item in os.listdir(template_dir):
28-
source = os.path.join(template_dir, item)
28+
for item in os.listdir(script_template_dir):
29+
source = os.path.join(script_template_dir, item)
30+
destination = os.path.join(target_dir, item)
31+
32+
if os.path.isdir(source):
33+
logger.debug(f"Copying directory {source} to {destination}...")
34+
shutil.copytree(source, destination, dirs_exist_ok=True)
35+
else:
36+
logger.debug(f"Copying file {source} to {destination}...")
37+
shutil.copy2(source, destination)
38+
39+
40+
def copy_function_template(target_dir: str) -> None:
41+
os.makedirs(target_dir, exist_ok=True)
42+
43+
for item in os.listdir(function_template_dir):
44+
source = os.path.join(function_template_dir, item)
2945
destination = os.path.join(target_dir, item)
3046

3147
if os.path.isdir(source):

src/datacustomcode/templates/.devcontainer/devcontainer.json renamed to src/datacustomcode/templates/function/.devcontainer/devcontainer.json

File renamed without changes.
File renamed without changes.

src/datacustomcode/templates/Dockerfile.dependencies renamed to src/datacustomcode/templates/function/Dockerfile.dependencies

File renamed without changes.
File renamed without changes.
File renamed without changes.

src/datacustomcode/templates/build_native_dependencies.sh renamed to src/datacustomcode/templates/function/build_native_dependencies.sh

File renamed without changes.

src/datacustomcode/templates/examples/employee_hierarchy/employee_data.csv renamed to src/datacustomcode/templates/function/examples/employee_hierarchy/employee_data.csv

File renamed without changes.

0 commit comments

Comments
 (0)