Skip to content
138 changes: 110 additions & 28 deletions newrelic/api/opentelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
from opentelemetry.trace.status import Status, StatusCode

from newrelic.core.database_utils import generate_dynamodb_arn, get_database_operation_target_from_statement
from newrelic.api.application import application_instance
from newrelic.api.background_task import BackgroundTask
from newrelic.api.datastore_trace import DatastoreTrace
Expand Down Expand Up @@ -169,9 +170,9 @@ def __init__(
self.nr_trace = nr_trace_type(**trace_kwargs)
elif nr_trace_type == ExternalTrace:
trace_kwargs = {
"library": self.name or self.instrumenting_module,
"library": self.instrumenting_module,
"url": self.attributes.get("http.url"),
"method": self.attributes.get("http.method"),
"method": self.attributes.get("http.method") or self.name,
"parent": self.nr_parent,
}
self.nr_trace = nr_trace_type(**trace_kwargs)
Expand Down Expand Up @@ -216,10 +217,16 @@ def set_attributes(self, attributes):
self.set_attribute(key, value)

def _set_attributes_in_nr(self, otel_attributes=None):
if not (otel_attributes and hasattr(self, "nr_trace") and self.nr_trace):
if not otel_attributes or not getattr(self, "nr_trace", None):
return

# If these attributes already exist in NR's agent attributes,
# keep the attributes in the OTel span, but do not add them
# to NR's user attributes to avoid sending the same data
# multiple times.
for key, value in otel_attributes.items():
self.nr_trace.add_custom_attribute(key, value)
if key not in self.nr_trace.agent_attributes:
self.nr_trace.add_custom_attribute(key, value)

def add_event(self, name, attributes=None, timestamp=None):
# TODO: Not implemented yet.
Expand Down Expand Up @@ -296,6 +303,53 @@ def record_exception(self, exception, attributes=None, timestamp=None, escaped=F

notice_error(error_args, attributes=attributes)

def _database_attribute_mapping(self):
span_obj_attrs = {
"host": self.attributes.get("net.peer.name") or self.attributes.get("server.address"),
"database_name": self.attributes.get("db.name"),
"port_path_or_id": self.attributes.get("net.peer.port") or self.attributes.get("server.port"),
"product": self.attributes.get("db.system").capitalize(),
}
agent_attrs = {}

db_statement = self.attributes.get("db.statement")
if db_statement:
if hasattr(db_statement, "string"):
db_statement = db_statement.string
operation, target = get_database_operation_target_from_statement(db_statement)
target = target or self.attributes.get("db.mongodb.collection")
span_obj_attrs.update({
"operation": operation,
"target": target,
})
elif span_obj_attrs["product"] == "Dynamodb":
region = self.attributes.get("cloud.region")
operation = self.attributes.get("db.operation")
target = self.attributes.get("aws.dynamodb.table_names", [None])[-1]
account_id = self.nr_transaction.settings.cloud.aws.account_id
resource_id = generate_dynamodb_arn(span_obj_attrs["host"], region, account_id, target)
agent_attrs.update({
"aws.operation": self.attributes.get("db.operation"),
"cloud.resource_id": resource_id,
"cloud.region": region,
"aws.requestId": self.attributes.get("aws.request_id"),
"http.statusCode": self.attributes.get("http.status_code"),
"cloud.account.id": account_id,
})
span_obj_attrs.update({
"target": target,
"operation": operation,
})

# We do not want to override any agent attributes
# with `None` if `value` does not exist.
for key, value in span_obj_attrs.items():
if value:
setattr(self.nr_trace, key, value)
for key, value in agent_attrs.items():
if value:
self.nr_trace._add_agent_attribute(key, value)

def end(self, end_time=None, *args, **kwargs):
# We will ignore the end_time parameter and use NR's end_time

Expand All @@ -305,37 +359,31 @@ def end(self, end_time=None, *args, **kwargs):
if not nr_trace or (nr_trace and getattr(nr_trace, "end_time", None)):
return

# Add attributes as Trace parameters
self._set_attributes_in_nr(self.attributes)
# We will need to add specific attributes to the
# NR trace before the node creation because the
# attributes were likely not available at the time
# of the trace's creation but eventually populated
# throughout the span's lifetime.

# Database/Datastore specific attributes
if self.attributes.get("db.system"):
self._database_attribute_mapping()

# For each kind of NR Trace, we will need to add
# specific attributes since they were likely not
# available at the time of the trace's creation.
if self.instrumenting_module in ("Redis", "Mongodb"):
self.nr_trace.host = self.attributes.get("net.peer.name", self.attributes.get("server.address"))
self.nr_trace.port_path_or_id = self.attributes.get("net.peer.port", self.attributes.get("server.port"))
self.nr_trace.database_name = self.attributes.get("db.name")
self.nr_trace.product = self.attributes.get("db.system")
elif self.instrumenting_module == "Dynamodb":
self.nr_trace.database_name = self.attributes.get("db.name")
self.nr_trace.product = self.attributes.get("db.system")
self.nr_trace.port_path_or_id = self.attributes.get("net.peer.port")
self.nr_trace.host = self.attributes.get("dynamodb.{region}.amazonaws.com")

# Set SpanKind attribute
self._set_attributes_in_nr({"span.kind": self.kind})
# External specific attributes
self.nr_trace._add_agent_attribute("http.statusCode", self.attributes.get("http.status_code"))

# Add OTel attributes as custom NR trace attributes
self._set_attributes_in_nr(self.attributes)

error = sys.exc_info()
self.nr_trace.__exit__(*error)
self.set_status(StatusCode.OK if not error[0] else StatusCode.ERROR)

if ("exception.escaped" in self.attributes) or (
self.kind in (otel_api_trace.SpanKind.SERVER, otel_api_trace.SpanKind.CONSUMER)
and isinstance(current_trace(), Sentinel)
):
if ("exception.escaped" in self.attributes) or (self.kind in (otel_api_trace.SpanKind.SERVER, otel_api_trace.SpanKind.CONSUMER) and isinstance(current_trace(), Sentinel)):
# We need to end the transaction as well
self.nr_transaction.__exit__(*error)


def __exit__(self, exc_type, exc_val, exc_tb):
"""
Ends context manager and calls `end` on the `Span`.
Expand All @@ -346,7 +394,12 @@ def __exit__(self, exc_type, exc_val, exc_tb):
if self._record_exception:
self.record_exception(exception=exc_val, escaped=True)
if self.set_status_on_exception:
self.set_status(Status(status_code=StatusCode.ERROR, description=f"{exc_type.__name__}: {exc_val}"))
self.set_status(
Status(
status_code=StatusCode.ERROR,
description=f"{exc_type.__name__}: {exc_val}",
)
)

super().__exit__(exc_type, exc_val, exc_tb)

Expand All @@ -360,6 +413,27 @@ def _create_web_transaction(self):
if "nr.wsgi.environ" in self.attributes:
# This is a WSGI request
transaction = WSGIWebTransaction(self.nr_application, environ=self.attributes.pop("nr.wsgi.environ"))
elif "nr.asgi.scope" in self.attributes:
# This is an ASGI request
scope = self.attributes.pop("nr.asgi.scope")
scheme = scope.get("scheme", "http")
server = scope.get("server") or (None, None)
host, port = scope["server"] = tuple(server)
request_method = scope.get("method")
request_path = scope.get("path")
query_string = scope.get("query_string")
headers = scope["headers"] = tuple(scope.get("headers", ()))
transaction = WebTransaction(
application=self.nr_application,
name=self.name,
scheme=scheme,
host=host,
port=port,
request_method=request_method,
request_path=request_path,
query_string=query_string,
headers=headers,
)
else:
# This is a web request
headers = self.attributes.pop("nr.http.headers", None)
Expand Down Expand Up @@ -549,4 +623,12 @@ def get_tracer(
*args,
**kwargs,
):
return Tracer(*args, resource=self._resource, instrumentation_library=instrumenting_module_name, **kwargs)
return Tracer(
*args,
instrumentation_library=instrumenting_module_name,
instrumenting_library_version=instrumenting_library_version,
schema_url=schema_url,
attributes=attributes,
resource=self._resource,
**kwargs
)
14 changes: 10 additions & 4 deletions newrelic/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4364,7 +4364,9 @@ def _process_module_builtin_defaults():

# Hybrid Agent Hooks
_process_module_definition(
"opentelemetry.context", "newrelic.hooks.hybridagent_opentelemetry", "instrument_context_api"
"opentelemetry.context",
"newrelic.hooks.hybridagent_opentelemetry",
"instrument_context_api",
)

_process_module_definition(
Expand All @@ -4374,11 +4376,15 @@ def _process_module_builtin_defaults():
)

_process_module_definition(
"opentelemetry.trace", "newrelic.hooks.hybridagent_opentelemetry", "instrument_trace_api"
"opentelemetry.trace",
"newrelic.hooks.hybridagent_opentelemetry",
"instrument_trace_api",
)

_process_module_definition(
"opentelemetry.instrumentation.utils", "newrelic.hooks.hybridagent_opentelemetry", "instrument_utils"
"opentelemetry.instrumentation.utils",
"newrelic.hooks.hybridagent_opentelemetry",
"instrument_utils",
)


Expand Down
4 changes: 2 additions & 2 deletions newrelic/core/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,9 +597,9 @@ def connect_to_data_collector(self, activate_agent):
internal_metric("Supportability/Python/AzureFunctionMode/enabled", 1)

# OpenTelemetry Bridge toggle metric
opentelemetry_bridge = configuration.opentelemetry.enabled
opentelemetry_bridge = "enabled" if configuration.opentelemetry.enabled else "disabled"
internal_metric(
f"Supportability/Tracing/Python/OpenTelemetryBridge/{'enabled' if opentelemetry_bridge else 'disabled'}",
f"Supportability/Tracing/Python/OpenTelemetryBridge/{opentelemetry_bridge}",
1,
)

Expand Down
26 changes: 25 additions & 1 deletion newrelic/core/database_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@
return operation if operation in _operation_table else ""


def _parse_operation_otel(sql):
match = _parse_operation_re.search(sql)
operation = (match and match.group(1).lower())
return operation or ""


def _parse_target(sql, operation):
sql = sql.rstrip(";")
parse = _operation_table.get(operation, None)
Expand Down Expand Up @@ -898,5 +904,23 @@
result = SQLStatement(sql, database)

_sql_statements[key] = result

return result


def generate_dynamodb_arn(host, region=None, account_id=None, target=None):
# There are 3 different partition options.
# See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html for details.
partition = "aws"
if "amazonaws.cn" in host:
partition = "aws-cn"
elif "amazonaws-us-gov.com" in host:

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

The string
amazonaws-us-gov.com
may be at an arbitrary position in the sanitized URL.

Copilot Autofix

AI 17 days ago

In general, to fix incomplete URL substring sanitization, you should parse the URL (if necessary), extract the hostname, and then perform exact or well-scoped suffix checks on that hostname instead of using arbitrary substring searches. For AWS endpoints specifically, you should compare against known, exact domain suffixes (like .amazonaws.cn and .amazonaws-us-gov.com) or match a constrained pattern, not just search for the substring anywhere.

For this code, the most direct and non-breaking fix is to (1) ensure the input is treated strictly as a host/hostname (not a full URL string), and then (2) replace the substring checks with suffix checks that correctly distinguish the AWS partitions:

  • Treat host as the candidate hostname; if it accidentally contains a scheme or path, first parse it using urllib.parse.urlparse and extract .hostname.
  • Normalize host to lowercase for case-insensitive comparison.
  • Check:
    • If the hostname equals amazonaws.cn or ends with .amazonaws.cn, treat it as the aws-cn partition.
    • Else if it equals amazonaws-us-gov.com or ends with .amazonaws-us-gov.com, treat it as the aws-us-gov partition.
    • Otherwise keep the default aws.

This avoids matching cases like evil-amazonaws-us-gov.com.example.org, because such strings do not end with the exact AWS domain suffix. To implement this, we need to import urlparse from urllib.parse, normalize host, handle the case where parsing yields no hostname, and then perform stricter endswith checks in generate_dynamodb_arn. All changes are within newrelic/core/database_utils.py, and no functionality other than safer partition detection is modified.

Suggested changeset 1
newrelic/core/database_utils.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/newrelic/core/database_utils.py b/newrelic/core/database_utils.py
--- a/newrelic/core/database_utils.py
+++ b/newrelic/core/database_utils.py
@@ -21,6 +21,8 @@
 import re
 import weakref
 
+from urllib.parse import urlparse
+
 from newrelic.core.config import global_settings
 from newrelic.core.internal_metrics import internal_metric
 
@@ -911,11 +913,19 @@
     # There are 3 different partition options.
     # See  https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html for details.
     partition = "aws"
-    if "amazonaws.cn" in host:
-        partition = "aws-cn"
-    elif "amazonaws-us-gov.com" in host:
-        partition = "aws-us-gov"
 
+    # Normalize and, if necessary, parse host to extract the hostname component.
+    if host:
+        parsed = urlparse(host)
+        hostname = parsed.hostname or host
+        hostname = hostname.lower()
+
+        # Match against known AWS partition suffixes, not arbitrary substrings.
+        if hostname == "amazonaws.cn" or hostname.endswith(".amazonaws.cn"):
+            partition = "aws-cn"
+        elif hostname == "amazonaws-us-gov.com" or hostname.endswith(".amazonaws-us-gov.com"):
+            partition = "aws-us-gov"
+
     if partition and region and account_id and target:
         return f"arn:{partition}:dynamodb:{region}:{account_id:012d}:table/{target}"
 
EOF
@@ -21,6 +21,8 @@
import re
import weakref

from urllib.parse import urlparse

from newrelic.core.config import global_settings
from newrelic.core.internal_metrics import internal_metric

@@ -911,11 +913,19 @@
# There are 3 different partition options.
# See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html for details.
partition = "aws"
if "amazonaws.cn" in host:
partition = "aws-cn"
elif "amazonaws-us-gov.com" in host:
partition = "aws-us-gov"

# Normalize and, if necessary, parse host to extract the hostname component.
if host:
parsed = urlparse(host)
hostname = parsed.hostname or host
hostname = hostname.lower()

# Match against known AWS partition suffixes, not arbitrary substrings.
if hostname == "amazonaws.cn" or hostname.endswith(".amazonaws.cn"):
partition = "aws-cn"
elif hostname == "amazonaws-us-gov.com" or hostname.endswith(".amazonaws-us-gov.com"):
partition = "aws-us-gov"

if partition and region and account_id and target:
return f"arn:{partition}:dynamodb:{region}:{account_id:012d}:table/{target}"

Copilot is powered by AI and may make mistakes. Always verify output.
partition = "aws-us-gov"

if partition and region and account_id and target:
return f"arn:{partition}:dynamodb:{region}:{account_id:012d}:table/{target}"


def get_database_operation_target_from_statement(db_statement):
operation = _parse_operation_otel(db_statement)
target = _parse_target(db_statement, operation)
return operation, target
20 changes: 5 additions & 15 deletions newrelic/hooks/external_botocore.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from botocore.response import StreamingBody

from newrelic.core.database_utils import generate_dynamodb_arn
from newrelic.api.datastore_trace import DatastoreTrace
from newrelic.api.external_trace import ExternalTrace
from newrelic.api.function_trace import FunctionTrace
Expand Down Expand Up @@ -1295,21 +1296,10 @@ def _nr_dynamodb_datastore_trace_wrapper_(wrapped, instance, args, kwargs):
settings = transaction.settings if transaction.settings else global_settings()
account_id = settings.cloud.aws.account_id if settings and settings.cloud.aws.account_id else None

# There are 3 different partition options.
# See https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html for details.
partition = None
if hasattr(instance, "_endpoint") and hasattr(instance._endpoint, "host"):
_db_host = instance._endpoint.host
partition = "aws"
if "amazonaws.cn" in _db_host:
partition = "aws-cn"
elif "amazonaws-us-gov.com" in _db_host:
partition = "aws-us-gov"

if partition and region and account_id and _target:
agent_attrs["cloud.resource_id"] = (
f"arn:{partition}:dynamodb:{region}:{account_id:012d}:table/{_target}"
)
_db_host = getattr(getattr(instance, "_endpoint", None), "host", None)
resource_id = generate_dynamodb_arn(_db_host, region, account_id, _target)
if resource_id:
agent_attrs["cloud.resource_id"] = resource_id

except Exception:
_logger.debug("Failed to capture AWS DynamoDB info.", exc_info=True)
Expand Down
18 changes: 7 additions & 11 deletions newrelic/hooks/hybridagent_opentelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
def wrap__load_runtime_context(wrapped, instance, args, kwargs):
application = application_instance(activate=False)
settings = global_settings() if not application else application.settings

if not settings.opentelemetry.enabled:
return wrapped(*args, **kwargs)

Expand All @@ -52,16 +51,14 @@ def wrap__load_runtime_context(wrapped, instance, args, kwargs):
def wrap_get_global_response_propagator(wrapped, instance, args, kwargs):
application = application_instance(activate=False)
settings = global_settings() if not application else application.settings

if not settings.opentelemetry.enabled:
return wrapped(*args, **kwargs)

from opentelemetry.instrumentation.propagators import set_global_response_propagator

from newrelic.api.opentelemetry import otel_context_propagator

from opentelemetry.instrumentation.propagators import set_global_response_propagator

set_global_response_propagator(otel_context_propagator)

return otel_context_propagator


Expand Down Expand Up @@ -91,12 +88,10 @@ def wrap_set_tracer_provider(wrapped, instance, args, kwargs):
application.activate()

settings = global_settings() if not application else application.settings

if not settings:
# The application may need more time to start up
time.sleep(0.5)
settings = global_settings() if not application else application.settings

if not settings or not settings.opentelemetry.enabled:
return wrapped(*args, **kwargs)

Expand Down Expand Up @@ -137,9 +132,8 @@ def wrap_get_tracer_provider(wrapped, instance, args, kwargs):

if _TRACER_PROVIDER is None:
from newrelic.api.opentelemetry import TracerProvider

hybrid_agent_tracer_provider = TracerProvider("hybrid_agent_tracer_provider")
_TRACER_PROVIDER = hybrid_agent_tracer_provider
_TRACER_PROVIDER = TracerProvider()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just making sure we don't still need to pass "hybrid_agent_tracer_provider" into here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I changed this logic slightly to be more in line with the actual TracerProvider's functionality. I'll flesh it out some more in the future, but basically, the OTel TracerProvider doesn't really take in a name.


return _TRACER_PROVIDER


Expand Down Expand Up @@ -226,6 +220,8 @@ def wrap_start_internal_or_server_span(wrapped, instance, args, kwargs):
# This is an HTTP request (WSGI, ASGI, or otherwise)
if "wsgi.version" in context_carrier:
attributes["nr.wsgi.environ"] = context_carrier
elif "asgi" in context_carrier:
attributes["nr.asgi.scope"] = context_carrier
else:
attributes["nr.http.headers"] = context_carrier
else:
Expand Down
Loading
Loading