Skip to content

Commit 24c13cc

Browse files
author
Zonglin Peng
committed
[Falcon][NMS][non-functional] Migrate BoxWithNMSLimit to Cadence namespace as a custom op from Caffe2
To add the op when `to_edge` or `to_executorch`, `import executorch.backends.cadence.aot.ops_registrations # noqa` Differential Revision: [D78835800](https://our.internmc.facebook.com/intern/diff/D78835800/) ghstack-source-id: 320170643 Pull Request resolved: #15502
1 parent 11f752c commit 24c13cc

File tree

3 files changed

+120
-0
lines changed

3 files changed

+120
-0
lines changed

backends/cadence/aot/ops_registrations.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ def _validate_ref_impl_exists() -> None:
5656

5757
if op_name_clean not in ref_impls:
5858
if op_name not in _SKIP_OPS:
59+
print("*"*100)
60+
print(op_name_clean)
5961
error_impls.append(op_name)
6062

6163
if error_impls:
@@ -81,6 +83,13 @@ def register_fake(
8183
_REGISTERED_META_KERNELS.add(op_name)
8284
return _register_fake_original(op_name)
8385

86+
lib.define(
87+
"box_with_nms_limit.out(Tensor scores, Tensor boxes, Tensor batch_splits, float score_thresh, float nms, int detections_per_im, bool soft_nms_enabled, str soft_nms_method, float soft_nms_sigma, float soft_nms_min_score_thres, bool rotated, bool cls_agnostic_bbox_reg, bool input_boxes_include_bg_cls, bool output_classes_include_bg_cls, bool legacy_plus_one, Tensor[]? _caffe2_preallocated_outputs=None, *, Tensor(a!) out_scores, Tensor(b!) out_boxes, Tensor(c!) out_classes, Tensor(d!) batch_splits_out, Tensor(e!) out_keeps, Tensor(f!) out_keeps_size) -> (Tensor(a!) scores, Tensor(b!) boxes, Tensor(c!) classes, Tensor(d!) batch_splits, Tensor(e!) keeps, Tensor(f!) keeps_size)"
88+
)
89+
90+
lib.define(
91+
"box_with_nms_limit(Tensor scores, Tensor boxes, Tensor batch_splits, float score_thresh, float nms, int detections_per_im, bool soft_nms_enabled, str soft_nms_method, float soft_nms_sigma, float soft_nms_min_score_thres, bool rotated, bool cls_agnostic_bbox_reg, bool input_boxes_include_bg_cls, bool output_classes_include_bg_cls, bool legacy_plus_one, Tensor[]? _caffe2_preallocated_outputs=None) -> (Tensor scores, Tensor boxes, Tensor classes, Tensor batch_splits, Tensor keeps, Tensor keeps_size)"
92+
)
8493

8594
lib.define(
8695
"quantize_per_tensor(Tensor input, float scale, int zero_point, int quant_min, int quant_max, ScalarType dtype) -> (Tensor Z)"
@@ -2734,6 +2743,48 @@ def quantized_w8a32_gru_meta(
27342743
return hidden.new_empty((2, hidden.shape[-1]), dtype=torch.float32)
27352744

27362745

2746+
2747+
@register_fake("cadence::box_with_nms_limit")
2748+
def box_with_nms_limit_meta(
2749+
tscores: torch.Tensor,
2750+
tboxes: torch.Tensor,
2751+
tbatch_splits: torch.Tensor,
2752+
score_thres: float,
2753+
nms_thres: float,
2754+
detections_per_im: int,
2755+
soft_nms_enabled: bool,
2756+
soft_nms_method_str: str,
2757+
soft_nms_sigma: float,
2758+
soft_nms_min_score_thres: float,
2759+
rotated: bool,
2760+
cls_agnostic_bbox_reg: bool,
2761+
input_boxes_include_bg_cls: bool,
2762+
output_classes_include_bg_cls: bool,
2763+
legacy_plus_one: bool,
2764+
optional_tensor_list: Optional[list[torch.Tensor]] = None,
2765+
) -> Tuple[
2766+
torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor
2767+
]:
2768+
box_dim = 5 if rotated else 4
2769+
assert detections_per_im != 0
2770+
batch_size = tbatch_splits.size(0)
2771+
num_classes = tscores.size(1)
2772+
out_scores = tscores.new_empty([detections_per_im])
2773+
out_boxes = tscores.new_empty([detections_per_im, box_dim])
2774+
out_classes = tscores.new_empty([detections_per_im])
2775+
batch_splits_out = tscores.new_empty([batch_size])
2776+
out_keeps = tscores.new_empty([detections_per_im], dtype=torch.int32)
2777+
out_keeps_size = tscores.new_empty([batch_size, num_classes], dtype=torch.int32)
2778+
2779+
return (
2780+
out_scores,
2781+
out_boxes,
2782+
out_classes,
2783+
batch_splits_out,
2784+
out_keeps,
2785+
out_keeps_size,
2786+
)
2787+
27372788
# Validate that all meta kernels have reference implementations
27382789
# This is called at module import time to catch missing implementations early
27392790
_validate_ref_impl_exists()

backends/cadence/aot/ref_implementations.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
from executorch.exir.scalar_type import ScalarType
1616
from torch.library import impl, Library
1717

18+
from typing import Optional
19+
1820
m = Library("cadence", "IMPL", "CompositeExplicitAutograd")
1921
torch.ops.load_library("//executorch/kernels/quantized:custom_ops_generated_lib")
2022

@@ -2146,3 +2148,45 @@ def quantized_softmax(
21462148
out_scale,
21472149
out_zero_point,
21482150
)
2151+
2152+
2153+
@impl_tracked(m, "box_with_nms_limit")
2154+
def meta_box_with_nms_limit(
2155+
tscores: torch.Tensor,
2156+
tboxes: torch.Tensor,
2157+
tbatch_splits: torch.Tensor,
2158+
score_thres: float,
2159+
nms_thres: float,
2160+
detections_per_im: int,
2161+
soft_nms_enabled: bool,
2162+
soft_nms_method_str: str,
2163+
soft_nms_sigma: float,
2164+
soft_nms_min_score_thres: float,
2165+
rotated: bool,
2166+
cls_agnostic_bbox_reg: bool,
2167+
input_boxes_include_bg_cls: bool,
2168+
output_classes_include_bg_cls: bool,
2169+
legacy_plus_one: bool,
2170+
optional_tensor_list: Optional[list[torch.Tensor]] = None,
2171+
) -> tuple[
2172+
torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor
2173+
]:
2174+
2175+
return torch.ops._caffe2.BoxWithNMSLimit(
2176+
tscores,
2177+
tboxes,
2178+
tbatch_splits,
2179+
score_thres,
2180+
nms_thres,
2181+
detections_per_im,
2182+
soft_nms_enabled,
2183+
soft_nms_method_str,
2184+
soft_nms_sigma,
2185+
soft_nms_min_score_thres,
2186+
rotated,
2187+
cls_agnostic_bbox_reg,
2188+
input_boxes_include_bg_cls,
2189+
output_classes_include_bg_cls,
2190+
legacy_plus_one,
2191+
optional_tensor_list,
2192+
)

backends/cadence/aot/replace_ops.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,30 @@ def call_operator(
195195
)
196196

197197

198+
@register_cadence_pass(CadencePassAttribute(opt_level=0))
199+
class ReplaceCaffe2BoxWithNMSLimitWithCadenceBoxWithNMSLimit(ExportPass):
200+
"""Replaces _caffe2 BoxWithNMSLimit ops with Cadence BoxWithNMSLimit ops.
201+
"""
202+
203+
def call_operator(
204+
self,
205+
op,
206+
args: Tuple[Argument, ...],
207+
kwargs: Dict[str, Argument],
208+
meta: NodeMetadata,
209+
) -> ProxyValue:
210+
ns = exir_ops.edge if isinstance(op, EdgeOpOverload) else torch.ops
211+
if op != ns._caffe2.BoxWithNMSLimit.default:
212+
return super().call_operator(op, args, kwargs, meta)
213+
214+
return super().call_operator(
215+
exir_ops.edge.cadence.box_with_nms_limit.default,
216+
args,
217+
kwargs,
218+
meta,
219+
)
220+
221+
198222
@register_cadence_pass(CadencePassAttribute(opt_level=0))
199223
class ReplaceSqueezeAndUnsqueezeWithViewPass(ExportPass):
200224
"""
@@ -2162,6 +2186,7 @@ class CadenceReplaceOpsInGraph:
21622186
ReplaceScalarTensorWithFullPass,
21632187
ReplaceInfArgInFullWithValuePass,
21642188
ReplaceLogicalNotBooleanWhereWithWherePass,
2189+
ReplaceCaffe2BoxWithNMSLimitWithCadenceBoxWithNMSLimit,
21652190
ReplaceAdaptiveAvgPoolWithAtenAvgPoolPass,
21662191
ReplaceAtenAvgPoolWithCadenceAvgPoolPass,
21672192
ReplaceWhereWithFullArgsWithWhereScalar,

0 commit comments

Comments
 (0)