Skip to content

Commit 05b9aa0

Browse files
committed
default on telemetry + logs to investigate failing test
Signed-off-by: Nikhil Suri <[email protected]>
1 parent 07e4fc4 commit 05b9aa0

File tree

4 files changed

+114
-10
lines changed

4 files changed

+114
-10
lines changed

src/databricks/sql/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def read(self) -> Optional[OAuthToken]:
320320
)
321321

322322
self.force_enable_telemetry = kwargs.get("force_enable_telemetry", False)
323-
self.enable_telemetry = kwargs.get("enable_telemetry", False)
323+
self.enable_telemetry = kwargs.get("enable_telemetry", True) # Default to True for telemetry
324324
self.telemetry_enabled = TelemetryHelper.is_telemetry_enabled(self)
325325

326326
TelemetryClientFactory.initialize_telemetry_client(

src/databricks/sql/common/feature_flag.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,28 +106,37 @@ def get_flag_value(self, name: str, default_value: Any) -> Any:
106106

107107
def _refresh_flags(self):
108108
"""Performs a synchronous network request to fetch and update flags."""
109+
import logging
110+
logger = logging.getLogger(__name__)
111+
109112
headers = {}
110113
try:
111114
# Authenticate the request
112115
self._connection.session.auth_provider.add_headers(headers)
113116
headers["User-Agent"] = self._connection.session.useragent_header
114117

118+
logger.info(f"Fetching feature flags from: {self._feature_flag_endpoint}")
115119
response = self._http_client.request(
116120
HttpMethod.GET, self._feature_flag_endpoint, headers=headers, timeout=30
117121
)
118122

123+
logger.info(f"Feature flag response status: {response.status}")
119124
if response.status == 200:
120125
# Parse JSON response from urllib3 response data
121126
response_data = json.loads(response.data.decode())
127+
logger.info(f"Feature flag response data: {response_data}")
122128
ff_response = FeatureFlagsResponse.from_dict(response_data)
123129
self._update_cache_from_response(ff_response)
130+
logger.info(f"Feature flags loaded: {self._flags}")
124131
else:
125132
# On failure, initialize with an empty dictionary to prevent re-blocking.
133+
logger.info(f"Feature flag fetch failed with status {response.status}, initializing empty flags")
126134
if self._flags is None:
127135
self._flags = {}
128136

129137
except Exception as e:
130138
# On exception, initialize with an empty dictionary to prevent re-blocking.
139+
logger.info(f"Feature flag fetch exception: {type(e).__name__}: {e}, initializing empty flags")
131140
if self._flags is None:
132141
self._flags = {}
133142

src/databricks/sql/telemetry/telemetry_client.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,21 @@ def get_auth_flow(auth_provider):
110110
@staticmethod
111111
def is_telemetry_enabled(connection: "Connection") -> bool:
112112
if connection.force_enable_telemetry:
113+
logger.info("Telemetry: force_enable_telemetry=True, telemetry ENABLED")
113114
return True
114115

115116
if connection.enable_telemetry:
117+
logger.info(f"Telemetry: enable_telemetry=True, checking feature flag: {TelemetryHelper.TELEMETRY_FEATURE_FLAG_NAME}")
116118
context = FeatureFlagsContextFactory.get_instance(connection)
117119
flag_value = context.get_flag_value(
118120
TelemetryHelper.TELEMETRY_FEATURE_FLAG_NAME, default_value=False
119121
)
120-
return str(flag_value).lower() == "true"
122+
logger.info(f"Telemetry: feature flag value = '{flag_value}'")
123+
enabled = str(flag_value).lower() == "true"
124+
logger.info(f"Telemetry: feature flag check result = {enabled}")
125+
return enabled
121126
else:
127+
logger.info("Telemetry: enable_telemetry=False, telemetry DISABLED")
122128
return False
123129

124130

@@ -191,13 +197,12 @@ def __init__(
191197

192198
def _export_event(self, event):
193199
"""Add an event to the batch queue and flush if batch is full"""
194-
logger.debug("Exporting event for connection %s", self._session_id_hex)
200+
logger.info(f"Exporting telemetry event for connection {self._session_id_hex}")
195201
with self._lock:
196202
self._events_batch.append(event)
203+
logger.info(f"Event added to batch, batch size now: {len(self._events_batch)}/{self._batch_size}")
197204
if len(self._events_batch) >= self._batch_size:
198-
logger.debug(
199-
"Batch size limit reached (%s), flushing events", self._batch_size
200-
)
205+
logger.info(f"Batch size reached ({self._batch_size}), flushing events")
201206
self._flush()
202207

203208
def _flush(self):
@@ -465,9 +470,8 @@ def initialize_telemetry_client(
465470
TelemetryClientFactory._initialize()
466471

467472
if session_id_hex not in TelemetryClientFactory._clients:
468-
logger.debug(
469-
"Creating new TelemetryClient for connection %s",
470-
session_id_hex,
473+
logger.info(
474+
f"Creating telemetry client for connection {session_id_hex}, telemetry_enabled={telemetry_enabled}"
471475
)
472476
if telemetry_enabled:
473477
TelemetryClientFactory._clients[
@@ -481,14 +485,17 @@ def initialize_telemetry_client(
481485
batch_size=batch_size,
482486
client_context=client_context,
483487
)
488+
logger.info(f"Created TelemetryClient for connection {session_id_hex}")
484489
else:
485490
TelemetryClientFactory._clients[
486491
session_id_hex
487492
] = NoopTelemetryClient()
493+
logger.info(f"Created NoopTelemetryClient for connection {session_id_hex}")
488494
except Exception as e:
489-
logger.debug("Failed to initialize telemetry client: %s", e)
495+
logger.info(f"Failed to initialize telemetry client: {type(e).__name__}: {e}")
490496
# Fallback to NoopTelemetryClient to ensure connection doesn't fail
491497
TelemetryClientFactory._clients[session_id_hex] = NoopTelemetryClient()
498+
logger.info(f"Fallback to NoopTelemetryClient for connection {session_id_hex}")
492499

493500
@staticmethod
494501
def get_telemetry_client(session_id_hex):

tests/e2e/test_telemetry_e2e.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,13 @@ def test_enable_telemetry_on_with_server_on_sends_events(
277277
Scenario: enable_telemetry=ON, force_enable_telemetry=OFF, server=ON
278278
Expected: 2 events (initial_log + latency_log)
279279
"""
280+
from databricks.sql.telemetry.telemetry_client import TelemetryHelper
281+
282+
print(f"\n{'='*80}")
283+
print(f"TEST: test_enable_telemetry_on_with_server_on_sends_events")
284+
print(f"Feature flag being checked: {TelemetryHelper.TELEMETRY_FEATURE_FLAG_NAME}")
285+
print(f"{'='*80}\n")
286+
280287
(
281288
captured_events,
282289
captured_futures,
@@ -296,6 +303,11 @@ def test_enable_telemetry_on_with_server_on_sends_events(
296303
"telemetry_batch_size": 1,
297304
}
298305
) as conn:
306+
print(f"\nConnection created:")
307+
print(f" enable_telemetry: {conn.enable_telemetry}")
308+
print(f" force_enable_telemetry: {conn.force_enable_telemetry}")
309+
print(f" telemetry_enabled (computed): {conn.telemetry_enabled}")
310+
print(f" telemetry_client type: {type(conn._telemetry_client).__name__}\n")
299311
with conn.cursor() as cursor:
300312
cursor.execute("SELECT 1")
301313
cursor.fetchone()
@@ -424,6 +436,82 @@ def test_both_flags_off_does_not_send_events(self, telemetry_interceptors):
424436

425437
print(f"\nStatement ID: {statement_id}")
426438

439+
def test_default_behavior_sends_events_with_server_flag_on(
440+
self, telemetry_interceptors
441+
):
442+
"""
443+
Scenario: Neither enable_telemetry nor force_enable_telemetry passed (uses defaults)
444+
Expected: 2 events (initial_log + latency_log) when server feature flag is ON
445+
446+
Default behavior:
447+
- enable_telemetry defaults to True
448+
- force_enable_telemetry defaults to False
449+
- Telemetry will be sent if server feature flag is enabled
450+
"""
451+
from databricks.sql.telemetry.telemetry_client import TelemetryHelper
452+
453+
print(f"\n{'='*80}")
454+
print(f"TEST: test_default_behavior_sends_events_with_server_flag_on")
455+
print(f"Feature flag being checked: {TelemetryHelper.TELEMETRY_FEATURE_FLAG_NAME}")
456+
print(f"Testing DEFAULT behavior (no flags passed explicitly)")
457+
print(f"{'='*80}\n")
458+
459+
(
460+
captured_events,
461+
captured_futures,
462+
export_wrapper,
463+
callback_wrapper,
464+
) = telemetry_interceptors
465+
466+
with patch.object(
467+
TelemetryClient, "_export_event", export_wrapper
468+
), patch.object(
469+
TelemetryClient, "_telemetry_request_callback", callback_wrapper
470+
):
471+
# Connection without explicit telemetry flags - relies on defaults
472+
with self.connection(
473+
extra_params={
474+
"telemetry_batch_size": 1,
475+
}
476+
) as conn:
477+
# Verify defaults are as expected
478+
print(f"\nConnection created with DEFAULT settings:")
479+
print(f" enable_telemetry (default): {conn.enable_telemetry}")
480+
print(f" force_enable_telemetry (default): {conn.force_enable_telemetry}")
481+
print(f" telemetry_enabled (computed): {conn.telemetry_enabled}")
482+
print(f" telemetry_client type: {type(conn._telemetry_client).__name__}\n")
483+
484+
with conn.cursor() as cursor:
485+
cursor.execute("SELECT 99")
486+
cursor.fetchone()
487+
statement_id = cursor.query_id
488+
489+
time.sleep(2)
490+
done, not_done = wait(captured_futures, timeout=10)
491+
492+
# With default enable_telemetry=True and server flag ON, expect 2 events
493+
assert (
494+
len(captured_events) == 2
495+
), f"Expected exactly 2 events with default settings, got {len(captured_events)}"
496+
assert len(done) == 2, f"Expected exactly 2 responses, got {len(done)}"
497+
498+
# Verify HTTP responses
499+
for future in done:
500+
response = future.result()
501+
assert 200 <= response.status < 300
502+
503+
# Assert payload for all events
504+
for event in captured_events:
505+
self.assertSystemConfiguration(event)
506+
self.assertConnectionParams(
507+
event, expected_http_path=self.arguments["http_path"]
508+
)
509+
510+
# Assert latency event (second event)
511+
self.assertStatementExecution(captured_events[1])
512+
513+
print(f"\nStatement ID: {statement_id}")
514+
427515
def test_sql_error_sends_telemetry_with_error_info(self, telemetry_interceptors):
428516
"""
429517
Scenario: SQL query with invalid syntax causes error

0 commit comments

Comments
 (0)