From 2b1e00305c319bfbf89526961790780092c627e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Thu, 7 Aug 2025 08:07:17 +0200 Subject: [PATCH] feat(mat4): add x86 transpose function --- .github/workflows/cmake-multi-platform.yml | 2 +- src/main.c | 33 +++++++++++++++++---- src/math/mat4.c | 34 +++++++++++++++++++--- src/math/mat4.h | 4 +-- 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/.github/workflows/cmake-multi-platform.yml b/.github/workflows/cmake-multi-platform.yml index abfd8de..40255d1 100644 --- a/.github/workflows/cmake-multi-platform.yml +++ b/.github/workflows/cmake-multi-platform.yml @@ -73,4 +73,4 @@ jobs: working-directory: ${{ steps.strings.outputs.build-output-dir }} # Execute tests defined by the CMake configuration. Note that --build-config is needed because the default Windows generator is a multi-config generator (Visual Studio generator). # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail - run: ctest --build-config ${{ matrix.build_type }} --verbose \ No newline at end of file + run: ctest --build-config ${{ matrix.build_type }} \ No newline at end of file diff --git a/src/main.c b/src/main.c index f277e94..815c47f 100644 --- a/src/main.c +++ b/src/main.c @@ -3,11 +3,34 @@ int main(void) { - float arr[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + float arr[16] = { + 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 16 + }; + Mat4f_t mat1 = mat4f_from_array(arr); - Mat4f_t mat2 = mat4f_scalar(1); - Mat4f_t mat3 = mat4f_zero(); - Mat4f_t mat4 = mat4f_identity(); - printf("%f", arr[1]); + Mat4f_t* mat_tps = mat4f_tpo_r(&mat1); + + printf("%f %f %f %f\n%f %f %f %f\n%f %f %f %f\n%f %f %f %f\n", + mat1.m[0], + mat1.m[1], + mat1.m[2], + mat1.m[3], + mat1.m[4], + mat1.m[5], + mat1.m[6], + mat1.m[7], + mat1.m[8], + mat1.m[9], + mat1.m[10], + mat1.m[11], + mat1.m[12], + mat1.m[13], + mat1.m[14], + mat1.m[15] + + ); return 0; } diff --git a/src/math/mat4.c b/src/math/mat4.c index 2375184..bce0c93 100644 --- a/src/math/mat4.c +++ b/src/math/mat4.c @@ -227,22 +227,48 @@ Mat4f_t mat4_mul(const Mat4f_t* m1, const Mat4f_t* m2) return mout; } -Mat4f_t* mat4_tpo_r(Mat4f_t *__restrict out) +Mat4f_t* mat4f_tpo_r(Mat4f_t *__restrict out) { Mat4f_t clone = mat4f_clone(out); - for(int i = 0; i < MAT_DIM; i++) { -#if defined (SIMD_X86) + #if defined (SIMD_X86) + __m128 res[4]; + for (int i=0; im[0 * MAT_DIM], r0); + _mm_store_ps(&out->m[1 * MAT_DIM], r1); + _mm_store_ps(&out->m[2 * MAT_DIM], r2); + _mm_store_ps(&out->m[3 * MAT_DIM], r3); #elif defined (SIMD_ARCH) #else + for(int i = 0; i < MAT_DIM; i++) { int dim_i = i * MAT_DIM; for (int j = 0; j < MAT_DIM; j++) { out->m[dim_i + j] = clone.m[(j * MAT_DIM) + i]; } -#endif } +#endif return out; +} + +Mat4f_t mat4f_tpo(const Mat4f_t *restrict m) +{ + Mat4f_t res = mat4f_clone(m); + mat4f_clone(&res); + return res; } \ No newline at end of file diff --git a/src/math/mat4.h b/src/math/mat4.h index a0327e2..11eace5 100644 --- a/src/math/mat4.h +++ b/src/math/mat4.h @@ -34,8 +34,8 @@ Mat4f_t* mat4f_scale_r(Mat4f_t *out, float scalar); Mat4f_t mat4f_mul(const Mat4f_t* m1, const Mat4f_t* m2); Mat4f_t* mat4f_mul_r(Mat4f_t* out, const Mat4f_t* m2); -Mat4f_t mat4_tpo(const Mat4f_t *__restrict m); -Mat4f_t* mat4_tpo_r(Mat4f_t *__restrict m); +Mat4f_t mat4f_tpo(const Mat4f_t *__restrict m); +Mat4f_t* mat4f_tpo_r(Mat4f_t *__restrict m); // float mat4_det(const Mat4_t* m);