Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
codecov_yml_path: codecov.yml
token: ${{ secrets.CODECOV_TOKEN }}

integration-python:
test-reconcile:
runs-on:
group: databrickslabs-protected-runner-group
labels: linux-ubuntu-latest
Expand All @@ -68,8 +68,8 @@ jobs:
chmod +x $GITHUB_WORKSPACE/.github/scripts/setup_spark_remote.sh
$GITHUB_WORKSPACE/.github/scripts/setup_spark_remote.sh

- name: Run integration tests
run: hatch run integration
- name: Run reconcile tests
run: hatch run test-reconcile

- name: Publish test coverage
uses: codecov/codecov-action@v5
Expand Down Expand Up @@ -175,7 +175,7 @@ jobs:
echo "No file produced in tests-reports/"
exit 1

end-to-end:
test-install:

runs-on: ${{ matrix.runner }}
env: # this is a temporary hack
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ test:
test-install:
hatch run test-install

integration: setup_spark_remote
hatch run integration
test-reconcile: setup_spark_remote
hatch run test-reconcile
Comment on lines +39 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not do this. I'll provide more context in the review summary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need this convenience in my day to day. labs test errors out on my machine and I didnt really get how it works


coverage:
hatch run coverage && open htmlcov/index.html
Expand Down
24 changes: 12 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,18 @@ reconcile = "databricks.labs.lakebridge.reconcile.execute:main"
profiler_dashboards = "databricks.labs.lakebridge.assessments.dashboards.execute:main"

[tool.hatch.envs.default.scripts]
test = "pytest --cov src --cov-report=xml tests/unit"
test-install = "pytest --cov src --cov-report=xml tests/integration/install"
coverage = "pytest --cov src tests --cov-report=html --ignore=tests/integration/install --ignore=tests/integration/connections --ignore=tests/integration/assessments"
integration = "pytest --cov src tests/integration/reconcile --durations 20"
fmt = ["black .",
"ruff check . --fix",
"mypy --disable-error-code 'annotation-unchecked' .",
"pylint --output-format=colorized -j 0 src tests"]
verify = ["black --check .",
"ruff check .",
"mypy --disable-error-code 'annotation-unchecked' .",
"pylint --output-format=colorized -j 0 src tests"]
test = "pytest --cov src --cov-report=xml tests/unit"
test-install = "pytest --cov src --cov-report=xml tests/integration/install"
test-reconcile = "pytest --cov src tests/integration/reconcile --ignore=tests/integration/reconcile/system_tests --durations 20"
coverage = "pytest --cov src tests --cov-report=html --ignore=tests/integration/install --ignore=tests/integration/connections --ignore=tests/integration/assessments"
fmt = ["black .",
"ruff check . --fix",
"mypy --disable-error-code 'annotation-unchecked' .",
"pylint --output-format=colorized -j 0 src tests"]
verify = ["black --check .",
"ruff check .",
"mypy --disable-error-code 'annotation-unchecked' .",
"pylint --output-format=colorized -j 0 src tests"]

[tool.hatch.envs.sqlglot-latest]
python="3.10"
Expand Down
12 changes: 8 additions & 4 deletions src/databricks/labs/lakebridge/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import sys
import time
import webbrowser
from collections.abc import Mapping
from pathlib import Path
from typing import NoReturn, TextIO
Expand Down Expand Up @@ -652,9 +653,11 @@ def reconcile(*, w: WorkspaceClient) -> None:
ctx.workspace_client,
ctx.installation,
ctx.install_state,
ctx.prompts,
)
recon_runner.run(operation_name=RECONCILE_OPERATION_NAME)

_, job_run_url = recon_runner.run(operation_name=RECONCILE_OPERATION_NAME)
if ctx.prompts.confirm(f"Would you like to open the job run URL `{job_run_url}` in the browser?"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see where you are going which this, but let us not do this given our overall goal of going away from user prompting to UI.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check the implementation in llm_transpile

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then my refactoring makes sense, the prompting is where it should be in the cli and not in the runner that should not know anything about it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a move: hoisting it from inside the .run() call to outside. I'm neutral on that; even though I agree with @sundarshankar89 about the original code, that's not the point of this PR.

Overall I'd prefer to just drop the prompt and leave the existing logging of the URL: every terminal allows the job to be opened by just clicking on it.

That's not the point or focus of this PR, however, which is why I'm neutral on leaving the UX as-is for now.

I assume this was moved to make some of the tests easier in some way?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prompts are part of the cli - they wait till a user interacts. Other entry points like here (an integration test) do not require any user interaction nor the UI.

Removing it made the tests easier that I dont have to mock it since the real implementation suspends the execution waiting for a user input. I moved it to the cli because it only belongs there

webbrowser.open(job_run_url)


@lakebridge.command
Expand All @@ -668,10 +671,11 @@ def aggregates_reconcile(*, w: WorkspaceClient) -> None:
ctx.workspace_client,
ctx.installation,
ctx.install_state,
ctx.prompts,
)

recon_runner.run(operation_name=AGG_RECONCILE_OPERATION_NAME)
_, job_run_url = recon_runner.run(operation_name=AGG_RECONCILE_OPERATION_NAME)
if ctx.prompts.confirm(f"Would you like to open the job run URL `{job_run_url}` in the browser?"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

webbrowser.open(job_run_url)


@lakebridge.command
Expand Down
11 changes: 4 additions & 7 deletions src/databricks/labs/lakebridge/reconcile/runner.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging
import webbrowser

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.installation import SerdeError
from databricks.labs.blueprint.installer import InstallState
from databricks.labs.blueprint.tui import Prompts
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound, PermissionDenied
from databricks.sdk.service._internal import Wait
from databricks.sdk.service.jobs import Run

from databricks.labs.lakebridge.config import ReconcileConfig, TableRecon
from databricks.labs.lakebridge.deployment.recon import RECON_JOB_NAME
Expand All @@ -23,14 +23,12 @@ def __init__(
ws: WorkspaceClient,
installation: Installation,
install_state: InstallState,
prompts: Prompts,
):
self._ws = ws
self._installation = installation
self._install_state = install_state
self._prompts = prompts

def run(self, operation_name=RECONCILE_OPERATION_NAME):
def run(self, operation_name: str = RECONCILE_OPERATION_NAME) -> tuple[Wait[Run], str]:
reconcile_config = self._get_verified_recon_config()
job_id = self._get_recon_job_id(reconcile_config)
logger.info(f"Triggering the reconcile job with job_id: `{job_id}`")
Expand All @@ -42,8 +40,7 @@ def run(self, operation_name=RECONCILE_OPERATION_NAME):
logger.info(
f"'{operation_name.upper()}' job started. Please check the job_url `{job_run_url}` for the current status."
)
if self._prompts.confirm(f"Would you like to open the job run URL `{job_run_url}` in the browser?"):
webbrowser.open(job_run_url)
return wait, job_run_url

def _get_verified_recon_config(self) -> ReconcileConfig:
try:
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from databricks.labs.lakebridge.reconcile.schema_compare import SchemaCompare
from databricks.labs.lakebridge.reconcile.trigger_recon_service import TriggerReconService
from databricks.labs.lakebridge.transpiler.sqlglot.dialect_utils import get_dialect
from tests.integration.reconcile.connectors.test_read_schema import OracleDataSourceUnderTest
from tests.integration.reconcile.system_tests.test_read_schema import OracleDataSourceUnderTest


class DatabricksDataSourceUnderTest(DatabricksDataSource):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def _get_snowflake_options(self):
return opts


@pytest.mark.skip(reason="Add the creds to Github secrets and populate the actions' env to enable this test")
def test_sql_server_read_schema_happy(mock_spark):
mock_ws = create_autospec(WorkspaceClient)
connector = TSQLServerDataSourceUnderTest(mock_spark, mock_ws)
Expand Down
58 changes: 58 additions & 0 deletions tests/integration/reconcile/system_tests/test_recon_databricks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from databricks.labs.lakebridge.config import (
ReconcileConfig,
DatabaseConfig,
ReconcileMetadataConfig,
LakebridgeConfiguration,
)
from databricks.labs.lakebridge.contexts.application import ApplicationContext
from databricks.labs.lakebridge.reconcile.recon_config import RECONCILE_OPERATION_NAME
from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner

TABLE_RECON_JSON = """
{
"source_schema": "test_source",
"target_catalog": "sandbox",
"target_schema": "test_target",
"tables": [
{
"source_name": "diamonds",
"target_name": "diamonds",
"join_columns": ["color", "clarity"]
}
]
}
"""

recon_config = ReconcileConfig(
data_source="databricks",
report_type="all",
secret_scope="NOT_NEEDED",
database_config=DatabaseConfig(
source_catalog="sandbox", source_schema="test_source", target_catalog="sandbox", target_schema="test_target"
),
metadata_config=ReconcileMetadataConfig(catalog="sandbox", schema="reconcile"),
)
config = LakebridgeConfiguration(None, recon_config)
source_catalog_or_schema = (
recon_config.database_config.source_catalog
if recon_config.database_config.source_catalog
else recon_config.database_config.source_schema
)
filename = f"recon_config_{recon_config.data_source}_{source_catalog_or_schema}_{recon_config.report_type}.json"


def test_recon(ws):
ctx = ApplicationContext(ws)
recon_runner = ReconcileRunner(
ctx.workspace_client,
ctx.installation,
ctx.install_state,
)

ctx.installation.save(recon_config)
ctx.installation.upload(filename, TABLE_RECON_JSON.encode())
ctx.workspace_installation.install(config)

run, _ = recon_runner.run(operation_name=RECONCILE_OPERATION_NAME)
result = run.result()
assert result
46 changes: 15 additions & 31 deletions tests/unit/reconcile/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest
from databricks.labs.blueprint.installation import MockInstallation
from databricks.labs.blueprint.installer import InstallState
from databricks.labs.blueprint.tui import MockPrompts
from databricks.sdk import WorkspaceClient
from databricks.labs.lakebridge.reconcile.runner import ReconcileRunner
from databricks.labs.lakebridge.deployment.recon import RECON_JOB_NAME
Expand All @@ -12,15 +11,15 @@ def test_run_with_missing_recon_config():
ws = create_autospec(WorkspaceClient)
installation = MockInstallation()
install_state = InstallState.from_installation(installation)
prompts = MockPrompts({})
recon_runner = ReconcileRunner(ws, installation, install_state, prompts)

recon_runner = ReconcileRunner(ws, installation, install_state)
with pytest.raises(SystemExit):
recon_runner.run()


def test_run_with_corrupt_recon_config():
ws = create_autospec(WorkspaceClient)
prompts = MockPrompts({})

installation = MockInstallation(
{
"reconcile.yml": {
Expand All @@ -42,7 +41,7 @@ def test_run_with_corrupt_recon_config():
}
)
install_state = InstallState.from_installation(installation)
recon_runner = ReconcileRunner(ws, installation, install_state, prompts)
recon_runner = ReconcileRunner(ws, installation, install_state)
with pytest.raises(SystemExit):
recon_runner.run()

Expand Down Expand Up @@ -76,8 +75,8 @@ def test_run_with_missing_table_config():
}
)
install_state = InstallState.from_installation(installation)
prompts = MockPrompts({})
recon_runner = ReconcileRunner(ws, installation, install_state, prompts)

recon_runner = ReconcileRunner(ws, installation, install_state)
with pytest.raises(SystemExit):
recon_runner.run()

Expand Down Expand Up @@ -129,8 +128,8 @@ def test_run_with_corrupt_table_config():
}
)
install_state = InstallState.from_installation(installation)
prompts = MockPrompts({})
recon_runner = ReconcileRunner(ws, installation, install_state, prompts)

recon_runner = ReconcileRunner(ws, installation, install_state)
with pytest.raises(SystemExit):
recon_runner.run()

Expand Down Expand Up @@ -181,19 +180,14 @@ def test_run_with_missing_job_id():
}
)
install_state = InstallState.from_installation(installation)
prompts = MockPrompts({})
recon_runner = ReconcileRunner(ws, installation, install_state, prompts)

recon_runner = ReconcileRunner(ws, installation, install_state)
with pytest.raises(SystemExit):
recon_runner.run()


def test_run_with_job_id_in_config():
ws = create_autospec(WorkspaceClient)
prompts = MockPrompts(
{
r"Would you like to open the job run URL .*": "no",
}
)
installation = MockInstallation(
{
"reconcile.yml": {
Expand Down Expand Up @@ -243,19 +237,14 @@ def test_run_with_job_id_in_config():
wait.run_id = "rid"
ws.jobs.run_now.return_value = wait

recon_runner = ReconcileRunner(ws, installation, install_state, prompts)
recon_runner = ReconcileRunner(ws, installation, install_state)
recon_runner.run()
ws.jobs.run_now.assert_called_once_with(1234, job_parameters={'operation_name': 'reconcile'})


def test_run_with_job_id_in_state(monkeypatch):
monkeypatch.setattr("webbrowser.open", lambda url: None)
ws = create_autospec(WorkspaceClient)
prompts = MockPrompts(
{
r"Would you like to open the job run URL .*": "yes",
}
)
installation = MockInstallation(
{
"state.json": {
Expand Down Expand Up @@ -308,7 +297,7 @@ def test_run_with_job_id_in_state(monkeypatch):
wait.run_id = "rid"
ws.jobs.run_now.return_value = wait

recon_runner = ReconcileRunner(ws, installation, install_state, prompts)
recon_runner = ReconcileRunner(ws, installation, install_state)
recon_runner.run()
ws.jobs.run_now.assert_called_once_with(1234, job_parameters={'operation_name': 'reconcile'})

Expand Down Expand Up @@ -363,12 +352,12 @@ def test_run_with_failed_execution():
}
)
install_state = InstallState.from_installation(installation)
prompts = MockPrompts({})

wait = Mock()
wait.run_id = None
ws.jobs.run_now.return_value = wait

recon_runner = ReconcileRunner(ws, installation, install_state, prompts)
recon_runner = ReconcileRunner(ws, installation, install_state)
with pytest.raises(SystemExit):
recon_runner.run()
ws.jobs.run_now.assert_called_once_with(1234, job_parameters={'operation_name': 'reconcile'})
Expand All @@ -377,11 +366,6 @@ def test_run_with_failed_execution():
def test_aggregates_reconcile_run_with_job_id_in_state(monkeypatch):
monkeypatch.setattr("webbrowser.open", lambda url: None)
ws = create_autospec(WorkspaceClient)
prompts = MockPrompts(
{
r"Would you like to open the job run URL .*": "yes",
}
)
state = {
"resources": {"jobs": {RECON_JOB_NAME: "1234"}},
"version": 1,
Expand Down Expand Up @@ -449,6 +433,6 @@ def test_aggregates_reconcile_run_with_job_id_in_state(monkeypatch):
wait.run_id = "rid"
ws.jobs.run_now.return_value = wait

recon_runner = ReconcileRunner(ws, installation, install_state, prompts)
recon_runner = ReconcileRunner(ws, installation, install_state)
recon_runner.run(operation_name="aggregates-reconcile")
ws.jobs.run_now.assert_called_once_with(1234, job_parameters={'operation_name': 'aggregates-reconcile'})
Loading
Loading