diff --git a/.changes/unreleased/added-20260319-095618.yaml b/.changes/unreleased/added-20260319-095618.yaml new file mode 100644 index 00000000..a97ce140 --- /dev/null +++ b/.changes/unreleased/added-20260319-095618.yaml @@ -0,0 +1,9 @@ +kind: added +body: Add `get_changed_items()` utility function to detect Fabric items changed via git diff for use with selective deployment +time: 2026-03-19T09:56:18.0000000+00:00 +custom: + Author: vipulb91 + AuthorLink: https://github.com/vipulb91 + Issue: "865" + IssueLink: https://github.com/microsoft/fabric-cicd/issues/865 + diff --git a/docs/how_to/optional_feature.md b/docs/how_to/optional_feature.md index 6a9bd4b6..117083c6 100644 --- a/docs/how_to/optional_feature.md +++ b/docs/how_to/optional_feature.md @@ -100,6 +100,40 @@ Shortcuts are items associated with Lakehouse items and can be selectively publi **Note:** This feature can be applied along with the other selective deployment features — please be cautious when using to avoid unexpected results. +## Git-Based Change Detection + +`get_changed_items()` is a public utility function that uses `git diff` to detect which Fabric items have been added, modified, or renamed relative to a given git reference. It returns a list of strings in `"item_name.item_type"` format that can be passed directly to `items_to_include` in `publish_all_items()`. + +This function **does not require any feature flags** because it is a standalone utility — the filtering decision stays with the caller. + +**Important:** If `get_changed_items()` returns an empty list (no changes detected), do not call `publish_all_items()` without an explicit `items_to_include` list, as this would default to a full deployment. Always guard against the empty-list case: + +```python +from fabric_cicd import FabricWorkspace, publish_all_items, get_changed_items + +workspace = FabricWorkspace( + workspace_id="your-workspace-id", + repository_directory="/path/to/repo", + item_type_in_scope=["Notebook", "DataPipeline"] +) + +changed = get_changed_items(workspace.repository_directory) + +if changed: + # Requires enable_experimental_features and enable_items_to_include flags + publish_all_items(workspace, items_to_include=changed) +else: + print("No changed items detected — skipping deployment.") +``` + +To compare against a branch or a specific commit instead of the previous commit, pass a custom `git_compare_ref`: + +```python +changed = get_changed_items(workspace.repository_directory, git_compare_ref="main") +``` + +**Note:** `get_changed_items()` returns only items that were **modified or added** (i.e., candidates for publishing). It does not return deleted items. Passing `items_to_include` to `publish_all_items()` requires enabling the `enable_experimental_features` and `enable_items_to_include` feature flags. + ## Debugging If an error arises, or you want full transparency to all calls being made outside the library, enable debugging. Enabling debugging will write all API calls to the terminal. The logs can also be found in the `fabric_cicd.error.log` file. diff --git a/src/fabric_cicd/__init__.py b/src/fabric_cicd/__init__.py index dfd1a4f2..02eff4ff 100644 --- a/src/fabric_cicd/__init__.py +++ b/src/fabric_cicd/__init__.py @@ -12,7 +12,7 @@ from fabric_cicd._common._logging import configure_logger, exception_handler, get_file_handler from fabric_cicd.constants import FeatureFlag, ItemType from fabric_cicd.fabric_workspace import FabricWorkspace -from fabric_cicd.publish import deploy_with_config, publish_all_items, unpublish_all_orphan_items +from fabric_cicd.publish import deploy_with_config, get_changed_items, publish_all_items, unpublish_all_orphan_items logger = logging.getLogger(__name__) @@ -148,6 +148,7 @@ def disable_file_logging() -> None: "configure_external_file_logging", "deploy_with_config", "disable_file_logging", + "get_changed_items", "publish_all_items", "unpublish_all_orphan_items", ] diff --git a/src/fabric_cicd/_items/_base_publisher.py b/src/fabric_cicd/_items/_base_publisher.py index e1c97248..f5965bd0 100644 --- a/src/fabric_cicd/_items/_base_publisher.py +++ b/src/fabric_cicd/_items/_base_publisher.py @@ -355,11 +355,21 @@ def get_items_to_publish(self) -> dict[str, "Item"]: Get the items to publish for this item type. Returns: - Dictionary mapping item names to Item objects. + Dictionary mapping item names to Item objects, pre-filtered by + items_to_include when set so that only relevant items are iterated. Subclasses can override to filter or transform the items. """ - return self.fabric_workspace_obj.repository_items.get(self.item_type, {}) + all_items = self.fabric_workspace_obj.repository_items.get(self.item_type, {}) + items_to_include = self.fabric_workspace_obj.items_to_include + if not items_to_include: + return all_items + normalized_include_set = {i.lower() for i in items_to_include} + return { + name: item + for name, item in all_items.items() + if f"{name}.{self.item_type}".lower() in normalized_include_set + } def get_unpublish_order(self, items_to_unpublish: list[str]) -> list[str]: """ diff --git a/src/fabric_cicd/publish.py b/src/fabric_cicd/publish.py index 10f70e88..9ef89746 100644 --- a/src/fabric_cicd/publish.py +++ b/src/fabric_cicd/publish.py @@ -3,7 +3,10 @@ """Module for publishing and unpublishing Fabric workspace items.""" +import json import logging +import subprocess +from pathlib import Path from typing import Optional import dpath @@ -162,7 +165,17 @@ def publish_all_items( >>> print(responses) >>> # Access individual item response (dict with "header", "body", "status_code" keys) >>> notebook_response = workspace.responses["Notebook"]["Hello World"] - >>> print(notebook_response["status_code"]) # e.g., 200 + + With get_changed_items (deploy only git-changed items) + >>> from fabric_cicd import FabricWorkspace, publish_all_items, get_changed_items + >>> workspace = FabricWorkspace( + ... workspace_id="your-workspace-id", + ... repository_directory="/path/to/repo", + ... item_type_in_scope=["Notebook", "DataPipeline"] + ... ) + >>> changed = get_changed_items(workspace.repository_directory) + >>> if changed: + ... publish_all_items(workspace, items_to_include=changed) """ fabric_workspace_obj = validate_fabric_workspace_obj(fabric_workspace_obj) responses_enabled = FeatureFlag.ENABLE_RESPONSE_COLLECTION.value in constants.FEATURE_FLAG @@ -293,25 +306,11 @@ def unpublish_all_orphan_items( ... ) >>> publish_all_items(workspace) >>> items_to_include = ["Hello World.Notebook", "Run Hello World.DataPipeline"] - >>> unpublish_all_orphan_items(workspace, items_to_include=items_to_include) + >>> unpublish_orphaned_items(workspace, items_to_include=items_to_include) - With response collection - >>> from fabric_cicd import FabricWorkspace, publish_all_items, unpublish_all_orphan_items, append_feature_flag - >>> append_feature_flag("enable_response_collection") - >>> workspace = FabricWorkspace( - ... workspace_id="your-workspace-id", - ... repository_directory="/path/to/repo", - ... item_type_in_scope=["Environment", "Notebook", "DataPipeline"] - ... ) - >>> publish_all_items(workspace) - >>> responses = unpublish_all_orphan_items(workspace) - >>> # Access all unpublish responses - >>> print(responses) - >>> # Access individual item response (dict with "header", "body", "status_code" keys) - >>> notebook_response = workspace.unpublish_responses["Notebook"]["Hello World"] - >>> print(notebook_response["status_code"]) # e.g., 200 """ fabric_workspace_obj = validate_fabric_workspace_obj(fabric_workspace_obj) + validate_items_to_include(items_to_include, operation=constants.OperationType.UNPUBLISH) responses_enabled = FeatureFlag.ENABLE_RESPONSE_COLLECTION.value in constants.FEATURE_FLAG @@ -523,11 +522,171 @@ def deploy_with_config( def _collect_responses(workspace: Optional[FabricWorkspace], responses_enabled: bool) -> Optional[dict]: """Return collected API responses if available, otherwise None.""" - if not responses_enabled or workspace is None: - return None - result = {} - if workspace.responses: - result["publish"] = workspace.responses - if workspace.unpublish_responses: - result["unpublish"] = workspace.unpublish_responses - return result or None + if responses_enabled and workspace is not None and workspace.responses: + return workspace.responses + return None + + +def _find_platform_item(file_path: Path, repo_root: Path) -> Optional[tuple[str, str]]: + """ + Walk up from file_path towards repo_root looking for a .platform file. + + The .platform file marks the boundary of a Fabric item directory. + Its JSON content contains ``metadata.type`` (item type) and + ``metadata.displayName`` (item name). + + Returns: + A ``(item_name, item_type)`` tuple, or ``None`` if not found. + """ + current = file_path.parent + while True: + platform_file = current / ".platform" + if platform_file.exists(): + try: + data = json.loads(platform_file.read_text(encoding="utf-8")) + metadata = data.get("metadata", {}) + item_type = metadata.get("type") + item_name = metadata.get("displayName") or current.name + if item_type: + return item_name, item_type + except Exception as exc: + logger.debug(f"Could not parse .platform file at '{platform_file}': {exc}") + # Stop if we have reached the repository root or the filesystem root + if current == repo_root or current == current.parent: + break + current = current.parent + return None + + +def get_changed_items( + repository_directory: Path, + git_compare_ref: str = "HEAD~1", +) -> list[str]: + """ + Return the list of Fabric items that were added, modified, or renamed relative to ``git_compare_ref``. + + The returned list is in ``"item_name.item_type"`` format and can be passed directly + to the ``items_to_include`` parameter of :func:`publish_all_items` to deploy only + what has changed since the last commit. + + Args: + repository_directory: Path to the local git repository directory + (e.g. ``FabricWorkspace.repository_directory``). + git_compare_ref: Git ref to compare against. Defaults to ``"HEAD~1"``. + + Returns: + List of strings in ``"item_name.item_type"`` format. Returns an empty list when + no changes are detected, the git root cannot be found, or git is unavailable. + + Examples: + Deploy only changed items + >>> from fabric_cicd import FabricWorkspace, publish_all_items, get_changed_items + >>> workspace = FabricWorkspace( + ... workspace_id="your-workspace-id", + ... repository_directory="/path/to/repo", + ... item_type_in_scope=["Notebook", "DataPipeline"] + ... ) + >>> changed = get_changed_items(workspace.repository_directory) + >>> if changed: + ... publish_all_items(workspace, items_to_include=changed) + + With a custom git ref + >>> changed = get_changed_items(workspace.repository_directory, git_compare_ref="main") + >>> if changed: + ... publish_all_items(workspace, items_to_include=changed) + """ + changed, _ = _resolve_changed_items(Path(repository_directory), git_compare_ref) + return changed + + +def _resolve_changed_items( + repository_directory: Path, + git_compare_ref: str, +) -> tuple[list[str], list[str]]: + """ + Use ``git diff --name-status`` to detect Fabric items that changed or were + deleted relative to *git_compare_ref*. + + Args: + repository_directory: Absolute path to the local repository directory + (as stored on ``FabricWorkspace.repository_directory``). + git_compare_ref: Git ref to diff against (e.g. ``"HEAD~1"``). + + Returns: + A two-element tuple ``(changed_items, deleted_items)`` where each + element is a list of strings in ``"item_name.item_type"`` format. + Both lists are empty when the git root cannot be found or git fails. + """ + from fabric_cicd._common._config_validator import _find_git_root + + git_root = _find_git_root(repository_directory) + if git_root is None: + logger.warning("get_changed_items: could not locate a git repository root — returning empty list.") + return [], [] + + try: + result = subprocess.run( + ["git", "diff", "--name-status", git_compare_ref], + cwd=str(git_root), + capture_output=True, + text=True, + check=True, + ) + except subprocess.CalledProcessError as exc: + logger.warning(f"get_changed_items: 'git diff' failed ({exc.stderr.strip()}) — returning empty list.") + return [], [] + + changed_items: set[str] = set() + deleted_items: set[str] = set() + + for line in result.stdout.splitlines(): + line = line.strip() + if not line: + continue + + parts = line.split("\t") + status = parts[0].strip() + + # Renames produce three tab-separated fields: R\told\tnew + if status.startswith("R") and len(parts) >= 3: + file_path_str = parts[2] + elif len(parts) >= 2: + file_path_str = parts[1] + else: + continue + + abs_path = git_root / file_path_str + + # Only consider files inside the configured repository directory + try: + abs_path.relative_to(repository_directory) + except ValueError: + continue + + if status == "D": + # For deleted items: if the .platform file itself was deleted, we can + # recover item metadata from the old commit via `git show`. + if abs_path.name == ".platform": + try: + show_result = subprocess.run( + ["git", "show", f"{git_compare_ref}:{file_path_str}"], + cwd=str(git_root), + capture_output=True, + text=True, + check=True, + ) + data = json.loads(show_result.stdout) + metadata = data.get("metadata", {}) + item_type = metadata.get("type") + item_name = metadata.get("displayName") or abs_path.parent.name + if item_type and item_name: + deleted_items.add(f"{item_name}.{item_type}") + except Exception as exc: + logger.debug(f"get_changed_items: could not read deleted .platform '{file_path_str}': {exc}") + else: + # Modified / Added / Copied / Renamed — walk up to find the .platform + item_info = _find_platform_item(abs_path, repository_directory) + if item_info: + changed_items.add(f"{item_info[0]}.{item_info[1]}") + + return list(changed_items), list(deleted_items) diff --git a/tests/test_fabric_workspace.py b/tests/test_fabric_workspace.py index 34902bdf..a1fc0214 100644 --- a/tests/test_fabric_workspace.py +++ b/tests/test_fabric_workspace.py @@ -1578,6 +1578,7 @@ def test_mix_of_default_and_non_default_logical_ids(temp_workspace_dir, patched_ assert workspace.repository_items["Notebook"]["Git Notebook"].logical_id == unique_logical_id assert workspace.repository_items["DataPipeline"]["Exported Pipeline"].logical_id == constants.DEFAULT_GUID + def test_publish_variable_library_only_calls_replace_parameters( temp_workspace_dir, patched_fabric_workspace, valid_workspace_id ): diff --git a/tests/test_publish.py b/tests/test_publish.py index b26cb308..2ce3d61c 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -758,7 +758,9 @@ def test_folder_exclusion_with_items_to_include(mock_endpoint, temp_workspace_di assert workspace.repository_items["Notebook"]["ImportantNotebook"].skip_publish is True assert workspace.repository_items["Notebook"]["StandaloneNotebook"].skip_publish is False - assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is True + # OtherNotebook is pre-filtered in get_items_to_publish() because it is not in + # items_to_include, so _publish_item is never called and skip_publish stays False. + assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is False @pytest.mark.usefixtures("experimental_feature_flags") @@ -817,8 +819,10 @@ def test_folder_inclusion_with_items_to_include(mock_endpoint, temp_workspace_di ) assert workspace.repository_items["Notebook"]["Notebook1"].skip_publish is False - assert workspace.repository_items["Notebook"]["Notebook2"].skip_publish is True - assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is True + # Notebook2 and ArchivedNotebook are pre-filtered in get_items_to_publish() + # because they are not in items_to_include, so skip_publish stays False. + assert workspace.repository_items["Notebook"]["Notebook2"].skip_publish is False + assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is False @pytest.mark.usefixtures("experimental_feature_flags") @@ -849,10 +853,13 @@ def test_all_filters_combined(mock_endpoint, temp_workspace_dir): items_to_include=["TargetNotebook.Notebook"], ) - assert workspace.repository_items["Notebook"]["DebugNotebook"].skip_publish is True + # DebugNotebook, OtherNotebook, and ArchivedNotebook are pre-filtered in + # get_items_to_publish() because they are not in items_to_include, so + # _publish_item is never called and skip_publish stays False. + assert workspace.repository_items["Notebook"]["DebugNotebook"].skip_publish is False assert workspace.repository_items["Notebook"]["TargetNotebook"].skip_publish is False - assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is True - assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is True + assert workspace.repository_items["Notebook"]["OtherNotebook"].skip_publish is False + assert workspace.repository_items["Notebook"]["ArchivedNotebook"].skip_publish is False # ============================================================================= @@ -931,3 +938,120 @@ def test_publish_mixed_files_with_ipynb(self, publisher, mock_workspace): def test_item_type_is_notebook(self, publisher): """Test that item_type is correctly set to Notebook.""" assert publisher.item_type == ItemType.NOTEBOOK.value + + +# ============================================================================= +# Tests for get_changed_items() +# ============================================================================= + + +class TestGetChangedItems: + """Tests for the public get_changed_items() utility function.""" + + def _make_git_diff_output(self, lines: list[str]) -> str: + return "\n".join(lines) + + def test_returns_changed_items_from_git_diff(self, tmp_path): + """Returns items detected as modified/added by git diff.""" + # Set up a fake item directory with a .platform file + item_dir = tmp_path / "MyNotebook.Notebook" + item_dir.mkdir() + platform = item_dir / ".platform" + platform.write_text( + '{"metadata": {"type": "Notebook", "displayName": "MyNotebook"}}', + encoding="utf-8", + ) + changed_file = item_dir / "notebook.py" + changed_file.write_text("print('hello')", encoding="utf-8") + + diff_output = self._make_git_diff_output(["M\tMyNotebook.Notebook/notebook.py"]) + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = diff_output + mock_run.return_value.returncode = 0 + + result = publish.get_changed_items(tmp_path) + + assert result == ["MyNotebook.Notebook"] + + def test_returns_empty_list_when_no_changes(self, tmp_path): + """Returns an empty list when git diff reports no changed files.""" + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = "" + mock_run.return_value.returncode = 0 + + result = publish.get_changed_items(tmp_path) + + assert result == [] + + def test_returns_empty_list_when_git_root_not_found(self, tmp_path): + """Returns an empty list and logs a warning when no git root is found.""" + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with patch(git_root_patch, return_value=None): + result = publish.get_changed_items(tmp_path) + + assert result == [] + + def test_returns_empty_list_when_git_diff_fails(self, tmp_path): + """Returns an empty list and logs a warning when git diff fails.""" + import subprocess + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run", side_effect=subprocess.CalledProcessError(1, "git", stderr="bad ref")), + ): + result = publish.get_changed_items(tmp_path) + + assert result == [] + + def test_uses_custom_git_compare_ref(self, tmp_path): + """Passes the custom git_compare_ref to the underlying git command.""" + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = "" + mock_run.return_value.returncode = 0 + + publish.get_changed_items(tmp_path, git_compare_ref="main") + + call_args = mock_run.call_args[0][0] + assert "main" in call_args + + def test_excludes_files_outside_repository_directory(self, tmp_path): + """Files changed outside the configured repository_directory are ignored.""" + outside_dir = tmp_path / "other_repo" / "SomeItem.Notebook" + outside_dir.mkdir(parents=True) + + diff_output = self._make_git_diff_output(["M\tother_repo/SomeItem.Notebook/item.py"]) + + git_root_patch = "fabric_cicd._common._config_validator._find_git_root" + + with ( + patch(git_root_patch, return_value=tmp_path), + patch("subprocess.run") as mock_run, + ): + mock_run.return_value.stdout = diff_output + mock_run.return_value.returncode = 0 + + # Use a subdirectory as the repository_directory so "other_repo" is out of scope + repo_subdir = tmp_path / "my_workspace" + repo_subdir.mkdir() + result = publish.get_changed_items(repo_subdir) + + assert result == []