pytorch
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 12 additions & 7 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎backends/arm/_passes/rewrite_conv2d_pass.py‎
Lines changed: 10 additions & 2 deletions b/‎backends/arm/_passes/rewrite_conv2d_pass.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎backends/arm/operator_support/ethos_u55_support.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/operator_support/ethos_u55_support.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/slice_copy_support.py‎
Lines changed: 0 additions & 3 deletions b/‎backends/arm/operator_support/slice_copy_support.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 81 additions & 49 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 81 additions & 49 deletions
diff --git a/‎backends/arm/operators/op_avg_pool2d.py‎
Lines changed: 4 additions & 1 deletion b/‎backends/arm/operators/op_avg_pool2d.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/arm/operators/op_cat.py‎
Lines changed: 14 additions & 2 deletions b/‎backends/arm/operators/op_cat.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎backends/arm/operators/op_clamp.py‎
Lines changed: 5 additions & 7 deletions b/‎backends/arm/operators/op_clamp.py‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎backends/arm/operators/op_eq.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/operators/op_eq.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/operators/op_ge.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/operators/op_ge.py‎
Lines changed: 1 addition & 1 deletion
@@ -113,6 +113,7 @@
 
 from executorch.backends.arm._passes.arm_pass import ArmPass
 from executorch.backends.arm.tosa.specification import (
+    tosa_spec_in_set,
     TosaLoweringContext,
     TosaSpecification,
 )
@@ -309,16 +310,20 @@ def transform_to_backend_pipeline(
         self, exported_program: ExportedProgram, graph_module: GraphModule
     ):
         """Apply passes before transforming program to backend"""
-        if self.tosa_spec in (
-            TosaSpecification.create_from_string("TOSA-1.0+FP"),
-            TosaSpecification.create_from_string("TOSA-1.0+INT"),
+
+        if not tosa_spec_in_set(
+            self.tosa_spec,
+            {
+                TosaSpecification.create_from_string("TOSA-1.0+FP"),
+                TosaSpecification.create_from_string("TOSA-1.0+INT"),
+            },
         ):
-            return self._tosa_pipeline(exported_program, graph_module)
-        else:
-            raise NotImplementedError(
-                f"No pass pipeline implemented for {self.tosa_spec}"
+            raise RuntimeError(
+                f"No pass pipeline found for TOSA specification: {self.tosa_spec}"
             )
 
+        return self._tosa_pipeline(exported_program, graph_module)
+
     def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         # Preprocessing passes
         self.add_pass(RemoveGraphAssertsPass())
 
@@ -90,7 +90,7 @@ def _is_depthwise_conv2d(self, node: torch.fx.Node) -> bool:
             return False
         groups = node.args[-1]
         in_channels = get_first_fake_tensor(node.all_input_nodes[0]).shape[1]
-        out_channels = get_first_fake_tensor(node.all_input_nodes[1]).shape[0]
+        out_channels = get_first_fake_tensor(node).shape[1]
         return (in_channels == groups) and (out_channels % in_channels) == 0
 
     def _reshape_weights(self, weight_node: torch.fx.Node, in_channels: int) -> None:
@@ -103,6 +103,7 @@ def _reshape_weights(self, weight_node: torch.fx.Node, in_channels: int) -> None
             raise RuntimeError(
                 f"Weight node {weight_node.name} is not a parameter or buffer"
             )
+
         reshaped_weight_tensor = (
             weight_tensor.permute(HWCM_ORDER)
             .reshape(
@@ -118,14 +119,19 @@ def _reshape_weights(self, weight_node: torch.fx.Node, in_channels: int) -> None
             param_name = self.exported_program.graph_signature.inputs_to_buffers[
                 weight_node.name
             ]
+            reshaped_weight_tensor = torch.nn.Buffer(reshaped_weight_tensor)
         elif is_param(self.exported_program, weight_node):
             param_name = self.exported_program.graph_signature.inputs_to_parameters[
                 weight_node.name
             ]
+            reshaped_weight_tensor = torch.nn.Parameter(
+                reshaped_weight_tensor, requires_grad=False
+            )
         else:
             raise RuntimeError(
                 f"Weight node {weight_node.name} is neither a parameter nor a buffer"
             )
+
         self.exported_program.state_dict[param_name] = reshaped_weight_tensor
         weight_node.meta["val"] = weight_node.meta["val"].reshape(
             weight_tensor.shape[2],
@@ -243,7 +249,9 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
 
             if self._is_depthwise_conv2d(node):
                 target_op = exir_ops.backend.tosa.DEPTHWISE_CONV2D.default
-                self._reshape_weights(weight, input_fake_tensor.shape[1])
+                # If there are any TOSA.DEPTHWISE_CONV2D nodes using the weights, we've already reshaped them.
+                if all(user.target != target_op for user in weight.users):
+                    self._reshape_weights(weight, input_fake_tensor.shape[1])
                 weight_fake_tensor = get_first_fake_tensor(weight)
             else:
                 target_op = exir_ops.backend.tosa.CONV2D.default
 
@@ -78,10 +78,12 @@ def __init__(self, reporter: WhyNoPartitionReporter):
 
     targeted_ops_i8_i16_i32 = [
         exir_ops.edge.aten.cat.default,
+        exir_ops.edge.aten.expand_copy.default,
         exir_ops.edge.aten.repeat.default,
         exir_ops.edge.aten.constant_pad_nd.default,
         exir_ops.edge.aten.view.default,
         exir_ops.edge.aten.permute.default,
+        exir_ops.edge.aten.permute_copy.default,
     ]
 
     target_ops_i8 = tuple(TableOps.included_ops())
 
@@ -41,9 +41,6 @@ def is_node_tosa_supported(
         non-unit step sizes.
 
         """
-        if tosa_spec not in self.tosa_specs:
-            return False
-
         args = node.args
         if len(args) == 5 and (step := args[4]) != 1:
             logger.warning(f"{node.target} with step size of {step} not supported.")
 
@@ -146,6 +146,61 @@ def register_tosa_support_check(checker: Type[SupportedTOSAOperatorCheck]):
     return checker
 
 
+def _is_quantized_constant(node: torch.fx.Node) -> bool:
+    if node.target not in (
+        exir_ops.edge.aten.full_like.default,
+        *ComputeConstantOpsAOTPass.targeted_ops,
+    ):
+        return False
+
+    users = tuple(node.users)
+    if users and all(user.target in Q_OPS for user in users):
+        # The node feeds directly into only quantized ops.
+        return True
+
+    for user in users:
+        if user.target == exir_ops.edge.dim_order_ops._to_dim_order_copy.default:
+            dim_order_dtype = get_first_fake_tensor(user).dtype
+            if dim_order_dtype.is_complex or dim_order_dtype.is_floating_point:
+                return False
+        else:
+            return False
+
+    return len(users) > 0
+
+
+def is_quantized(node: torch.fx.Node) -> bool:
+    """Checks if the node is quantized.
+
+    A node is considered quantized if any of the following is true:
+    - Its output dtype is not floating point or complex => integer
+    - It is an op that produces a constant that in turn feeds only quantized users
+    - It has been marked as quantized in the ArmAnnotationInfo custom meta.
+
+    Args:
+        node (torch.fx.Node): The FX node to check.
+
+    Returns:
+        bool: True if the node is quantized, False otherwise.
+    """
+
+    node_dtype = get_first_fake_tensor(node).dtype
+    # Integer-like dtype implies the node is already quantized.
+    if not node_dtype.is_complex and not node_dtype.is_floating_point:
+        return True
+
+    # Nodes introduced during lowering that exclusively feed quantized users.
+    if _is_quantized_constant(node):
+        return True
+
+    # Finally, fall back to the explicit annotation emitted by Arm passes.
+    custom_meta = node.meta.get("custom", {})
+    if ArmAnnotationInfo.CUSTOM_META_KEY in custom_meta:
+        return custom_meta[ArmAnnotationInfo.CUSTOM_META_KEY]["quantized"]
+
+    return False
+
+
 def get_registered_tosa_support_checks(
     tosa_spec: TosaSpecification,
 ) -> list[Type[SupportedTOSAOperatorCheck]]:
@@ -194,9 +249,11 @@ def tosa_support_factory(
         ControlFlowOpSupported(exported_program, tosa_spec, reporter),
     ]
 
-    if tosa_spec.support_integer():
+    if tosa_spec.support_integer() and tosa_spec.support_float():
+        positive_checks.append(TOSAProINTFPSupportList())
+    elif tosa_spec.support_integer():
         positive_checks.append(TOSAProINTSupportList())
-    if tosa_spec.support_float():
+    elif tosa_spec.support_float():
         positive_checks.append(TOSAProFPSupportList())
     # TODO: Refactor to use TOSAProSupportLists + negtive checks
     positive_checks += [
@@ -268,6 +325,27 @@ def is_node_supported(
         return node.op == "call_function" and node.target in TOSA_PRO_FP_SupportList
 
 
+class TOSAProINTFPSupportList(OperatorSupportBase):
+    """
+    TOSA_PRO_INT_FP_SupportList:
+        Ops supported in INT+FP profile via native TOSA ops, decomposition/transformation, pre-compute, or TableOp.
+    """
+
+    def is_node_supported(
+        self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
+    ) -> bool:
+        if node.op != "call_function":
+            return False
+
+        # Select list based on whether the node is quantized.
+        if is_quantized(node) or node.target in (*Q_OPS, *DQ_OPS):
+            support_list = TOSA_PRO_INT_SupportList
+        else:
+            support_list = TOSA_PRO_FP_SupportList
+
+        return node.target in support_list
+
+
 class CheckArmQuantized(OperatorSupportBase):
     """
     Check if the node was marked as quantized in the Arm backend.
@@ -278,60 +356,14 @@ class CheckArmQuantized(OperatorSupportBase):
     def __init__(self, reporter: WhyNoPartitionReporter):
         self.reporter = reporter
 
-    def _is_quantized(self, node: torch.fx.Node) -> bool:
-        """Checks if the node is quantized.
-
-        A node is considered quantized if at least one criteria is met:
-        - Its dtype is not floating point or complex => integer
-        - It is one of the special cases where the node has been created in to_edge, e.g.
-          .Scalar operations that have been promoted .Tensor operations
-          where the scalar is replaced by a full op.
-        - It has been marked as quantized in the ArmAnnotationInfo custom meta.
-
-        Args:
-            node (torch.fx.Node): The FX node to check.
-
-        Returns:
-            bool: True if the node is quantized, False otherwise.
-        """
-        node_dtype = get_first_fake_tensor(node).dtype
-        if not node_dtype.is_complex and not node_dtype.is_floating_point:
-            return True
-        if node.target in (
-            exir_ops.edge.aten.full_like.default,
-            *ComputeConstantOpsAOTPass.targeted_ops,
-        ):
-            # Special cases where nodes have been created in to_edge, e.g.
-            # .Scalar operations that have been promoted .Tensor operations
-            # where the scalar is replaced by a full op.
-            if all(user.target in Q_OPS for user in node.users):
-                return True
-            for user in node.users:
-                if (
-                    user.target
-                    == exir_ops.edge.dim_order_ops._to_dim_order_copy.default
-                ):
-                    dim_order_dtype = get_first_fake_tensor(user).dtype
-                    if dim_order_dtype.is_complex or dim_order_dtype.is_floating_point:
-                        return False
-                else:
-                    return False
-            return True
-        return (
-            ArmAnnotationInfo.CUSTOM_META_KEY in node.meta.get("custom", {})
-            and ArmAnnotationInfo(
-                node.meta["custom"][ArmAnnotationInfo.CUSTOM_META_KEY]
-            ).quantized
-        )
-
     def is_node_supported(
         self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
     ) -> bool:
 
         if node.target in (*DQ_OPS, *Q_OPS):
             return True
 
-        if not self._is_quantized(node):
+        if not is_quantized(node):
             self.reporter.report_reject(
                 node, "Node was not marked as quantized in the Arm backend."
             )
 
@@ -115,10 +115,13 @@ def define_node(
     ) -> None:
         validate_num_inputs(self.target, inputs, [3, 4, 5, 6, 7])
         validate_same_dtype(self.target, [inputs[0], output], ts)
+        supported_dtypes = [ts.DType.INT8, ts.DType.FP32]
+        if self.tosa_spec.support_extension("int16"):
+            supported_dtypes.append(ts.DType.INT16)
         validate_valid_dtype(
             self.target,
             [inputs[0], output],
-            [ts.DType.INT8, ts.DType.INT16, ts.DType.FP32],
+            supported_dtypes,
             output.tosa_spec,
         )
 
 
@@ -14,6 +14,8 @@
 )
 from executorch.backends.arm.operators.operator_validation_utils import (
     validate_num_inputs,
+    validate_same_dtype,
+    validate_valid_dtype,
 )
 from executorch.backends.arm.tosa.mapping import TosaArg
 from torch.fx import Node
@@ -35,9 +37,19 @@ def define_node(
         inputs: List[TosaArg],
         output: TosaArg,
     ) -> None:
+        supported_dtypes = [ts.DType.BOOL, ts.DType.INT8, ts.DType.INT32, ts.DType.FP32]
+        if self.tosa_spec.support_extension("int16"):
+            supported_dtypes.append(ts.DType.INT16)
         validate_num_inputs(self.target, inputs, [1, 2])
+        input_tosa_args = [TosaArg(arg, output.tosa_spec) for arg in inputs[0].special]
+        validate_same_dtype(self.target, [*input_tosa_args, output], ts)
+        validate_valid_dtype(
+            self.target,
+            [*input_tosa_args, output],
+            supported_dtypes,
+            output.tosa_spec,
+        )
 
-        tensors = inputs[0].special
         dim = 0 if len(inputs) < 2 else inputs[1].number
         rank = len(output.shape)
         dim = (dim + rank) % rank
@@ -50,7 +62,7 @@ def define_node(
             node,
             tosa_graph,
             ts.Op.CONCAT,
-            [tensor.name for tensor in tensors],
+            [tensor.name for tensor in input_tosa_args],
             [output.name],
             attr,
         )
@@ -87,20 +87,18 @@ def define_node(
     ) -> None:
         validate_num_inputs(self.target, inputs, [2, 3])
         validate_same_dtype(self.target, [inputs[0], output], ts)
+        supported_dtypes = [ts.DType.INT8, ts.DType.FP16, ts.DType.FP32]
+        if self.tosa_spec.support_extension("int16"):
+            supported_dtypes.append(ts.DType.INT16)
         validate_valid_dtype(
             self.target,
             [inputs[0], output],
-            [
-                ts.DType.INT8,
-                ts.DType.INT16,
-                ts.DType.FP16,
-                ts.DType.FP32,
-            ],
+            supported_dtypes,
             output.tosa_spec,
         )
 
         node_input_dtype = node.meta["val"].dtype
-        # NOTE: Quantization of the min/max arguments is handled by QuantizeClampArgumentsPass
+        # NOTE: Quantization of the min/max arguments is handled by QuantizeOperatorArguments
         min_val, max_val = self._get_min_max_arguments(node, node_input_dtype)
 
         attr = ts.TosaSerializerAttribute()
 
@@ -47,7 +47,7 @@ def define_node(
         validate_valid_dtype(
             self.target,
             inputs,
-            [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32, ts.DType.FP32],
+            [ts.DType.INT32, ts.DType.FP32],
             output.tosa_spec,
         )
         validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)
 
@@ -47,7 +47,7 @@ def define_node(
         validate_valid_dtype(
             self.target,
             inputs,
-            [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32, ts.DType.FP32],
+            [ts.DType.INT32, ts.DType.FP32],
             output.tosa_spec,
         )
         validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ def define_node(`
`47`	`47`	`validate_valid_dtype(`
`48`	`48`	`self.target,`
`49`	`49`	`inputs,`
`50`		`- [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32, ts.DType.FP32],`
	`50`	`+ [ts.DType.INT32, ts.DType.FP32],`
`51`	`51`	`output.tosa_spec,`
`52`	`52`	`)`
`53`	`53`	`validate_valid_dtype(self.target, output, ts.DType.BOOL, output.tosa_spec)`