// the default hvx_vec_f32_to_f16 with output into the local array. static void __attribute__((noinline)) hvx_vec_f32_to_f16_a(void *ptr, HVX_Vector v0, HVX_Vector v1 ...