Skip to content

Commit

Permalink
neon support
Browse files Browse the repository at this point in the history
  • Loading branch information
serge-sans-paille committed Oct 6, 2024
1 parent 29fd328 commit 959a423
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions include/xsimd/arch/xsimd_neon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1748,6 +1748,23 @@ namespace xsimd
return select(batch_bool<T, A> { b... }, true_br, false_br, neon {});
}

/*************
* transpose *
*************/
template <class A>
XSIMD_INLINE void transpose(batch<float, A>* matrix_begin, batch<float, A>* matrix_end, requires_arch<neon>) noexcept
{
assert((matrix_end - matrix_begin == batch<float, A>::size) && "correctly sized matrix");
(void)matrix_end;
auto r0 = matrix_begin[0], r1 = matrix_begin[1], r2 = matrix_begin[2], r3 = matrix_begin[3];
auto t0 = vtrn1q_f32(r0, r1), t1 = vtrn2q_f32(r0, r1);
auto t2 = vtrn1q_f32(r2, r3), t3 = vtrn2q_f32(r2, r3);
matrix_begin[0] = vcombine_f32(vget_low_f32(t0), vget_low_f32(t2));
matrix_begin[1] = vcombine_f32(vget_low_f32(t1), vget_low_f32(t3));
matrix_begin[2] = vcombine_f32(vget_high_f32(t0), vget_high_f32(t2));
matrix_begin[3] = vcombine_f32(vget_high_f32(t1), vget_high_f32(t3));
}

/**********
* zip_lo *
**********/
Expand Down Expand Up @@ -2737,6 +2754,7 @@ namespace xsimd
return set(batch<T, A>(), A(), data[idx]...);
}
}

}

#undef WRAP_BINARY_INT_EXCLUDING_64
Expand Down

0 comments on commit 959a423

Please sign in to comment.