Skip to content

Commit b1358b9

Browse files
committed
NOCOMMIT: PoC: vectorize just expm1
Here is how we can vectorize expm1 given the prior PRs in this stack. It is NOCOMMIT because we actually should implement vectorization for all the `unary_ufunc_*` ops. ghstack-source-id: e65c2b075587f716b8a746616d3335bab69a261d ghstack-comment-id: 2751961712 Pull Request resolved: #9586
1 parent 694dfe6 commit b1358b9

File tree

2 files changed

+53
-2
lines changed

2 files changed

+53
-2
lines changed

kernels/portable/cpu/op_expm1.cpp

+52-2
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,66 @@
77
*/
88

99
#include <executorch/kernels/portable/cpu/pattern/pattern.h>
10+
#include <executorch/kernels/portable/cpu/util/elementwise_util.h>
1011
#include <executorch/runtime/kernel/kernel_includes.h>
1112
#include <cmath>
1213

1314
namespace torch {
1415
namespace executor {
1516
namespace native {
1617

18+
// REVIEW: I'm not entirely sure what the best way to implement this
19+
// namespace is. Some options:
20+
// 1) All in one file, with or without an `IMPLEMENT_VECTORIZED_MATH_OP` macro.
21+
// 2) Include in each `unary_ufunc_*` op_foo.cpp, with or without an
22+
// `IMPLEMENT_VECTORIZED_MATH_OP` macro.
23+
//
24+
// I think my preferred option would be (2) with a macro, but I've
25+
// left the macro out for ease of reading this PoC PR.
26+
namespace math {
27+
using std::expm1;
28+
#ifdef ET_USE_PYTORCH_HEADERS
29+
template <typename T>
30+
auto expm1(at::vec::Vectorized<T> x) {
31+
// ATen knows to do this conversion because the TensorIterator for this op
32+
// (and lots of similar ones in aten/src/ATen/native/UnaryOps.cpp) is created
33+
// with build_borrowing_unary_float_op.
34+
if constexpr (!executorch::runtime::is_floating_point<T>::value) {
35+
return at::vec::convert<float>(x).expm1();
36+
} else {
37+
return x.expm1();
38+
}
39+
}
40+
#endif
41+
} // namespace math
1742
Tensor& expm1_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
18-
return internal::unary_ufunc_realhbbf16_to_floathbf16(
19-
std::expm1, ctx, in, out);
43+
ET_KERNEL_CHECK(ctx, tensor_is_floating_type(out), InvalidArgument, out);
44+
45+
// Resize for dynamic shape
46+
ET_KERNEL_CHECK_MSG(
47+
ctx,
48+
resize_tensor(out, in.sizes()) == Error::Ok,
49+
InvalidArgument,
50+
out,
51+
"Failed to resize output tensor.");
52+
53+
ET_KERNEL_CHECK(
54+
ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
55+
56+
static constexpr const char op_name[] = "expm1.out";
57+
ET_SWITCH_REALHBBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE_IN, [&] {
58+
utils::apply_unitensor_elementwise_fn<
59+
CTYPE_IN,
60+
op_name,
61+
utils::SupportedTensorDtypes::FLOATHBF16>(
62+
[](auto x) { return math::expm1(x); },
63+
ctx,
64+
in,
65+
utils::SupportedTensorDtypes::REALHBBF16,
66+
out);
67+
});
68+
69+
return out;
2070
}
2171

2272
} // namespace native

shim_et/xplat/executorch/kernels/portable/op_registration_util.bzl

+1
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ ATEN_OPS = (
527527
name = "op_expm1",
528528
deps = [
529529
"//executorch/kernels/portable/cpu/pattern:pattern",
530+
"//executorch/kernels/portable/cpu/util:elementwise_util",
530531
],
531532
),
532533
op_target(

0 commit comments

Comments
 (0)