-
Notifications
You must be signed in to change notification settings - Fork 734
2/n Enable 16-bit activations and 8-bit weights in Cadence Quantizer for linear #15901
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -87,7 +87,6 @@ OPERATORS = [ | |||||
| "quantized_fully_connected_asym8sxasym8s_asym8s_per_tensor_out", | ||||||
| "quantized_fully_connected_asym8uxasym8u_asym8u_per_tensor_out", | ||||||
| "quantized_layer_norm", | ||||||
| "quantized_linear_out", | ||||||
| "quantized_linear_asym8sxasym8s_asym8s_per_tensor_out", | ||||||
| "quantized_linear_asym8uxasym8u_asym8u_per_tensor_out", | ||||||
| "quantized_matmul_out", | ||||||
|
|
@@ -122,3 +121,7 @@ def define_common_targets(): | |||||
| # Define build targets for all operators registered in the tables above. | ||||||
| for op in OPERATORS: | ||||||
| define_operator(op) | ||||||
|
|
||||||
| # quantized_linear_out and quantized_linear_per_tensor_out needs additional dependency for int16 support | ||||||
| define_operator("quantized_linear_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:quantize_linear_out", "fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:headers",]) | ||||||
|
||||||
| define_operator("quantized_linear_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:quantize_linear_out", "fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:headers",]) | |
| define_operator("quantized_linear_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:quantize_linear_out", "fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:headers"]) |
Copilot
AI
Nov 19, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove the trailing comma after the closing bracket on line 127. While Python allows trailing commas in lists, this is inconsistent with the style used elsewhere in the codebase and may cause issues with some linters or build tools.
| define_operator("quantized_linear_per_tensor_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:quantize_linear_out", "fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:headers",]) | |
| define_operator("quantized_linear_per_tensor_out", deps=["fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:quantize_linear_out", "fbcode//on_device_ai/Assistant/Jarvis/min_runtime/operators:headers"]) |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,132 @@ | ||||
| /* | ||||
| * Copyright (c) Meta Platforms, Inc. and affiliates. | ||||
| * All rights reserved. | ||||
| * | ||||
| * This source code is licensed under the BSD-style license found in the | ||||
| * LICENSE file in the root directory of this source tree. | ||||
| */ | ||||
|
|
||||
| #include <gtest/gtest.h> | ||||
| #include <sys/times.h> | ||||
|
|
||||
| #include <executorch/kernels/test/TestUtil.h> | ||||
| #include <executorch/runtime/core/error.h> | ||||
| #include <executorch/runtime/core/exec_aten/exec_aten.h> | ||||
| #include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h> | ||||
| #include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h> | ||||
| #include <executorch/runtime/platform/runtime.h> | ||||
|
|
||||
| #include <executorch/backends/cadence/hifi/operators/operators.h> | ||||
|
|
||||
| namespace impl { | ||||
| namespace HiFi { | ||||
| namespace native { | ||||
| namespace { | ||||
|
|
||||
| using ::executorch::aten::Scalar; | ||||
| using ::executorch::aten::ScalarType; | ||||
| using ::executorch::aten::Tensor; | ||||
| using ::executorch::aten::TensorImpl; | ||||
| using ::executorch::runtime::Error; | ||||
| using ::executorch::runtime::KernelRuntimeContext; | ||||
| using ::executorch::runtime::runtime_init; | ||||
| using ::executorch::runtime::testing::TensorFactory; | ||||
| using std::optional; | ||||
| using std::string_view; | ||||
|
|
||||
| class HiFiQuantizedLinearTest : public OperatorTest { | ||||
| public: | ||||
|
||||
| public: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
__ET_UNUSEDmarker should be removed from both thectxandoffsetparameters since they are now being used in the int16 activation case (lines 284, 293). This creates an inconsistency withquantized_linear_out(line 211, 220) where these markers have already been removed.