Simplify activation function selection.

cryscan · cryscan · commit ef021f1b42e0 · 2024-12-01T19:00:23.000+08:00
diff --git a/src/shaders/matmul_mat_fp16.wgsl b/src/shaders/matmul_mat_fp16.wgsl
@@ -124,18 +124,10 @@ fn matmul(in: Input) {
     }
 
     if all(u < vec2<u32>(ra.y, rb.y)) {
-#ifdef ACT_SQUARED_RELU
-        local_sum[0] = squared_relu(local_sum[0]);
-        local_sum[1] = squared_relu(local_sum[1]);
-        local_sum[2] = squared_relu(local_sum[2]);
-        local_sum[3] = squared_relu(local_sum[3]);
-#endif
-#ifdef ACT_TANH
-        local_sum[0] = tanh(local_sum[0]);
-        local_sum[1] = tanh(local_sum[1]);
-        local_sum[2] = tanh(local_sum[2]);
-        local_sum[3] = tanh(local_sum[3]);
-#endif
+        local_sum[0] = ACT(local_sum[0]);
+        local_sum[1] = ACT(local_sum[1]);
+        local_sum[2] = ACT(local_sum[2]);
+        local_sum[3] = ACT(local_sum[3]);
 #ifdef OUT_FP16
         output[compute_index(destination, in.uid.z, u.y + 0u, in.uid.x)] = pack4x16float(local_sum[0]);
         output[compute_index(destination, in.uid.z, u.y + 1u, in.uid.x)] = pack4x16float(local_sum[1]);
diff --git a/src/shaders/matmul_mat_int8.wgsl b/src/shaders/matmul_mat_int8.wgsl
@@ -138,18 +138,10 @@ fn matmul(in: Input) {
     }
 
     if all(u < vec2<u32>(ra.y, rb.y)) {
-#ifdef ACT_SQUARED_RELU
-        local_sum[0] = squared_relu(local_sum[0]);
-        local_sum[1] = squared_relu(local_sum[1]);
-        local_sum[2] = squared_relu(local_sum[2]);
-        local_sum[3] = squared_relu(local_sum[3]);
-#endif
-#ifdef ACT_TANH
-        local_sum[0] = tanh(local_sum[0]);
-        local_sum[1] = tanh(local_sum[1]);
-        local_sum[2] = tanh(local_sum[2]);
-        local_sum[3] = tanh(local_sum[3]);
-#endif
+        local_sum[0] = ACT(local_sum[0]);
+        local_sum[1] = ACT(local_sum[1]);
+        local_sum[2] = ACT(local_sum[2]);
+        local_sum[3] = ACT(local_sum[3]);
 #ifdef OUT_FP16
         output[compute_index(destination, in.uid.z, u.y + 0u, in.uid.x)] = pack4x16float(local_sum[0]);
         output[compute_index(destination, in.uid.z, u.y + 1u, in.uid.x)] = pack4x16float(local_sum[1]);
diff --git a/src/shaders/matmul_mat_nf4.wgsl b/src/shaders/matmul_mat_nf4.wgsl
@@ -205,18 +205,10 @@ fn matmul(in: Input) {
     }
 
     if all(u < vec2<u32>(ra.y, rb.y)) {
-#ifdef ACT_SQUARED_RELU
-        local_sum[0] = squared_relu(local_sum[0]);
-        local_sum[1] = squared_relu(local_sum[1]);
-        local_sum[2] = squared_relu(local_sum[2]);
-        local_sum[3] = squared_relu(local_sum[3]);
-#endif
-#ifdef ACT_TANH
-        local_sum[0] = tanh(local_sum[0]);
-        local_sum[1] = tanh(local_sum[1]);
-        local_sum[2] = tanh(local_sum[2]);
-        local_sum[3] = tanh(local_sum[3]);
-#endif
+        local_sum[0] = ACT(local_sum[0]);
+        local_sum[1] = ACT(local_sum[1]);
+        local_sum[2] = ACT(local_sum[2]);
+        local_sum[3] = ACT(local_sum[3]);
 #ifdef OUT_FP16
         output[compute_index(destination, in.uid.z, u.y + 0u, in.uid.x, 4u)] = pack4x16float(local_sum[0]);
         output[compute_index(destination, in.uid.z, u.y + 1u, in.uid.x, 4u)] = pack4x16float(local_sum[1]);
diff --git a/src/shaders/matmul_vec_fp16.wgsl b/src/shaders/matmul_vec_fp16.wgsl
@@ -94,13 +94,7 @@ fn matmul(@builtin(global_invocation_id) invocation_id: vec3<u32>) {
 
     if index == 0u {
         let btc = compute_index(destination, batch, token, channel);
-        var out = sketch[0];
-#ifdef ACT_SQUARED_RELU
-        out = squared_relu(out);
-#endif
-#ifdef ACT_TANH
-        out = tanh(out);
-#endif
+        let out = ACT(sketch[0]);
 #ifdef OUT_FP16
         output[btc] = pack4x16float(out);
 #else
diff --git a/src/shaders/matmul_vec_int8.wgsl b/src/shaders/matmul_vec_int8.wgsl
@@ -102,13 +102,7 @@ fn matmul(@builtin(global_invocation_id) invocation_id: vec3<u32>) {
 
     if index == 0u {
         let btc = compute_index(destination, batch, token, channel);
-        var out = sketch[0];
-#ifdef ACT_SQUARED_RELU
-        out = squared_relu(out);
-#endif
-#ifdef ACT_TANH
-        out = tanh(out);
-#endif
+        let out = ACT(sketch[0]);
 #ifdef OUT_FP16
         output[btc] = pack4x16float(out);
 #else
diff --git a/src/shaders/matmul_vec_nf4.wgsl b/src/shaders/matmul_vec_nf4.wgsl
@@ -154,13 +154,7 @@ fn matmul(@builtin(global_invocation_id) invocation_id: vec3<u32>) {
 
     if index == 0u {
         let btc = compute_index(destination, batch, token, channel, 2u);
-        var out = sketch[0];
-#ifdef ACT_SQUARED_RELU
-        out = squared_relu(out);
-#endif
-#ifdef ACT_TANH
-        out = tanh(out);
-#endif
+        let out = ACT(sketch[0]);
 #ifdef OUT_FP16
         output[btc] = pack4x16float(out);
 #else
diff --git a/src/shaders/subgroup/matmul_vec_fp16.wgsl b/src/shaders/subgroup/matmul_vec_fp16.wgsl
@@ -120,13 +120,7 @@ fn matmul(
 
     if index == 0u {
         let btc = compute_index(destination, batch, token, channel);
-        var out = sketch[0];
-#ifdef ACT_SQUARED_RELU
-        out = squared_relu(out);
-#endif
-#ifdef ACT_TANH
-        out = tanh(out);
-#endif
+        let out = ACT(sketch[0]);
 #ifdef OUT_FP16
         output[btc] = pack4x16float(out);
 #else
diff --git a/src/shaders/subgroup/matmul_vec_int8.wgsl b/src/shaders/subgroup/matmul_vec_int8.wgsl
@@ -127,13 +127,7 @@ fn matmul(
 
     if index == 0u {
         let btc = compute_index(destination, batch, token, channel);
-        var out = sketch[0];
-#ifdef ACT_SQUARED_RELU
-        out = squared_relu(out);
-#endif
-#ifdef ACT_TANH
-        out = tanh(out);
-#endif
+        let out = ACT(sketch[0]);
 #ifdef OUT_FP16
         output[btc] = pack4x16float(out);
 #else
diff --git a/src/shaders/subgroup/matmul_vec_nf4.wgsl b/src/shaders/subgroup/matmul_vec_nf4.wgsl
@@ -172,13 +172,7 @@ fn matmul(
 
     if index == 0u {
         let btc = compute_index(destination, batch, token, channel, 2u);
-        var out = sketch[0];
-#ifdef ACT_SQUARED_RELU
-        out = squared_relu(out);
-#endif
-#ifdef ACT_TANH
-        out = tanh(out);
-#endif
+        let out = ACT(sketch[0]);
 #ifdef OUT_FP16
         output[btc] = pack4x16float(out);
 #else
diff --git a/src/tensor/ops.rs b/src/tensor/ops.rs
@@ -150,16 +150,6 @@ pub enum Activation {
     Tanh,
 }
 
-impl std::fmt::Display for Activation {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Activation::None => write!(f, "NONE"),
-            Activation::SquaredRelu => write!(f, "SQUARED_RELU"),
-            Activation::Tanh => write!(f, "TANH"),
-        }
-    }
-}
-
 impl Macros {
     /// Define a `u32` macro `NF4_BLOCK_SIZE`.
     pub fn nf4(mut self, block_size: u32) -> Self {
@@ -196,6 +186,16 @@ impl Macros {
         }
     }
 
+    pub fn activate(mut self, name: impl Into<String>, value: Activation) -> Self {
+        let name = name.into();
+        match value {
+            Activation::None => self.insert(name, "".into()),
+            Activation::SquaredRelu => self.insert(name, "squared_relu".into()),
+            Activation::Tanh => self.insert(name, "tanh".into()),
+        };
+        self
+    }
+
     /// Define the macro specifies input/output tensor data type.
     pub fn tensor<T: Float>(
         mut self,
@@ -654,7 +654,7 @@ impl TensorOp {
                 .u32("BLOCK_SIZE", BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         #[cfg(feature = "subgroup-ops")]
         let pipeline = context.checkout_pipeline(
@@ -667,7 +667,7 @@ impl TensorOp {
                 .u32("BLOCK_SIZE", BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         let bindings = vec![context.device.create_bind_group(&BindGroupDescriptor {
             label: None,
@@ -743,7 +743,7 @@ impl TensorOp {
                 .int8(Self::INT8_BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         #[cfg(feature = "subgroup-ops")]
         let pipeline = context.checkout_pipeline(
@@ -757,7 +757,7 @@ impl TensorOp {
                 .int8(Self::INT8_BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         let bindings = vec![context.device.create_bind_group(&BindGroupDescriptor {
             label: None,
@@ -837,7 +837,7 @@ impl TensorOp {
                 .nf4(Self::NF4_BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         #[cfg(feature = "subgroup-ops")]
         let pipeline = context.checkout_pipeline(
@@ -851,7 +851,7 @@ impl TensorOp {
                 .nf4(Self::NF4_BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         let bindings = vec![context.device.create_bind_group(&BindGroupDescriptor {
             label: None,
@@ -932,7 +932,7 @@ impl TensorOp {
                 .u32("BLOCK_SIZE", BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         let bindings = vec![context.device.create_bind_group(&BindGroupDescriptor {
             label: None,
@@ -1013,7 +1013,7 @@ impl TensorOp {
                 .int8(Self::INT8_BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         let bindings = vec![context.device.create_bind_group(&BindGroupDescriptor {
             label: None,
@@ -1098,7 +1098,7 @@ impl TensorOp {
                 .nf4(Self::NF4_BLOCK_SIZE)
                 .tensor(&input, Some("IN"))
                 .tensor(&output, Some("OUT"))
-                .custom(active, Some("ACT")),
+                .activate("ACT", active),
         );
         let bindings = vec![context.device.create_bind_group(&BindGroupDescriptor {
             label: None,