1414/* For NNLIB APIs */
1515#include " xa_nnlib_kernels_api.h"
1616
17- /* Potential NNLIB function/APIs */
17+ #include < executorch/runtime/kernel/kernel_includes.h>
18+
19+ using executorch::runtime::KernelRuntimeContext;
20+ using executorch::runtime::Result;
1821
22+ /* Potential NNLIB function/APIs */
1923extern " C" WORD32 xa_nn_broadcast_32_32 (
2024 WORD32* __restrict__ p_out,
2125 const int * const out_shape,
2226 WORD32* __restrict__ p_in,
2327 const int * const in_shape,
2428 int num_dims);
2529
30+ extern " C" WORD32 xa_nn_concat_32_32 (
31+ WORD32* __restrict__ p_out,
32+ const WORD32* const p_out_shape,
33+ const WORD32** pp_inps,
34+ const WORD32* const * pp_inps_shape,
35+ WORD32 num_out_dims,
36+ WORD32 num_inp,
37+ WORD32 num_inp_dims,
38+ WORD32 axis);
39+
2640extern " C" WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32 (
2741 FLOAT32* __restrict__ p_out,
2842 const WORD32* const p_out_shape,
@@ -31,6 +45,26 @@ extern "C" WORD32 xa_nn_elm_add_broadcast_4D_f32xf32_f32(
3145 const FLOAT32* __restrict__ p_inp2,
3246 const WORD32* const p_inp2_shape);
3347
48+ extern " C" void
49+ xa_nn_elm_atan2_f32 (FLOAT32* z, const FLOAT32* y, const FLOAT32* x, WORD32 N);
50+
51+ extern " C" WORD32 xa_nn_elm_clamp_f32xf32xf32_f32 (
52+ FLOAT32* __restrict__ p_out,
53+ const FLOAT32* __restrict__ p_inp,
54+ const FLOAT32* __restrict__ p_min,
55+ const FLOAT32* __restrict__ p_max,
56+ WORD32 num_elm);
57+
58+ extern " C" WORD32 xa_nn_elm_clamp_broadcast_4D_f32Xf32xf32_f32 (
59+ FLOAT32* __restrict__ p_out,
60+ const WORD32* const p_out_shape,
61+ const FLOAT32* __restrict__ p_inp,
62+ const WORD32* const p_inp_shape,
63+ const FLOAT32* __restrict__ p_min,
64+ const WORD32* const p_min_shape,
65+ const FLOAT32* __restrict__ p_max,
66+ const WORD32* const p_max_shape);
67+
3468extern " C" WORD32 xa_nn_elm_div_broadcast_4D_f32xf32_f32 (
3569 FLOAT32* __restrict__ p_out,
3670 const WORD32* const p_out_shape,
@@ -97,6 +131,20 @@ extern "C" void xa_nn_elm_pow_f32(
97131 const FLOAT32* restrict y,
98132 WORD32 N);
99133
134+ extern " C" WORD32 xa_nn_elm_remainder_f32xf32_f32 (
135+ FLOAT32* __restrict__ p_out,
136+ const FLOAT32* __restrict__ p_inp1,
137+ const FLOAT32* __restrict__ p_inp2,
138+ WORD32 num_elm);
139+
140+ extern " C" WORD32 xa_nn_elm_remainder_broadcast_4D_f32xf32_f32 (
141+ FLOAT32* __restrict__ p_out,
142+ const WORD32* const p_out_shape,
143+ const FLOAT32* __restrict__ p_inp1,
144+ const WORD32* const p_inp1_shape,
145+ const FLOAT32* __restrict__ p_inp2,
146+ const WORD32* const p_inp2_shape);
147+
100148extern " C" WORD32 xa_nn_elm_where_f32xf32_f32 (
101149 FLOAT32* __restrict__ p_out,
102150 const FLOAT32* __restrict__ p_inp1,
@@ -125,11 +173,22 @@ extern "C" WORD32 xa_nn_reduce_mean_4D_f32_f32(
125173 WORD32 num_axis_dims,
126174 void * __restrict__ p_scratch_in);
127175
176+ extern " C" WORD32 xa_nn_transpose_32_32 (
177+ WORD32* __restrict__ p_out,
178+ const WORD32* const p_out_shape,
179+ const WORD32* __restrict__ p_inp,
180+ const WORD32* const p_inp_shape,
181+ const WORD32* __restrict__ p_permute_vec,
182+ WORD32 num_out_dims,
183+ WORD32 num_inp_dims);
184+
128185namespace cadence {
129186namespace impl {
130187namespace HiFi {
131188namespace kernels {
132189
190+ void * allocate_temp_memory (KernelRuntimeContext& ctx, size_t size);
191+
133192void memcpy (void * dst, const void * src, size_t num_bytes);
134193
135194WORD32 matmul_asym8uxasym8u_asym8u (
0 commit comments