From 86beae9efddc1df34670559505fe5fcd0646fd8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Sun, 6 Jul 2025 09:54:41 +0200 Subject: [PATCH] feat(mat4): add sub and add functions --- src/math/mat4.c | 102 +++++++++++++++++++++++++++++++----------------- src/math/mat4.h | 11 ++++-- 2 files changed, 74 insertions(+), 39 deletions(-) diff --git a/src/math/mat4.c b/src/math/mat4.c index e49fd20..606d0ff 100644 --- a/src/math/mat4.c +++ b/src/math/mat4.c @@ -23,13 +23,15 @@ Mat4f_t mat4f_scalar(float f) { Mat4f_t mat; for(int i = 0; im[i]); + __m128 mb = _mm_load_ps(&m2->m[i]); -// Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2) -// { -// Mat4_t mat; + __m128 mres = _mm_add_ps(ma, mb); + _mm_store_ps(&out->m[i], mres); -// #if defined(__x86_64__) || defined(__i386__) - -// for (int i = 0; i < 16; i += 4) { -// __m128 a = _mm_loadu_ps(&m1->m[i]); -// __m128 b = _mm_loadu_ps(&m2->m[i]); -// __m128 c = _mm_add_ps(a, b); -// _mm_storeu_ps(&mat.m[i], c); -// } -// #elif defined(__aarch64__) -// for (int i = 0; i < 16; i += 4) { -// float32x4_t a = vld1q_f32(&m1->m[i]); -// float32x4_t b = vld1q_f32(&m2->m[i]); -// float32x4_t c = vaddq_f32(a, b); -// vst1q_f32(&mat.m[i], c); -// } -// #else -// for (int i = 0; i < 16; i++) { -// mat.m[i] = m1->m[i] + m2->m[i]; -// } -// #endif +#elif defined (SIMD_ARCH) + float32x4_t ma = vld1q_f32(&out->m[i]); + float32x4_t mb = vld1q_f32(&m2->m[i]); + + float32x4_t mres = vaddq_f32(ma, mb); -// return mat; -// } + vst1q_f32(&out->m[i], mres); + +#else + for (int j = 0; j < 4; j++) { + out->m[i + j] += m2->m[i + j]; + } +#endif + } + return out; +} -// Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2) -// { -// Mat4_t mat; +Mat4f_t mat4_add(const Mat4f_t* m1, const Mat4f_t* m2) +{ + Mat4f_t mout = mat4f_clone(m1); + mat4f_add_r(&mout, m2); + return mout; +} -// for(int i = 0; i<16; i++) { -// mat.m[i] = m1->m[i] - m2->m[i]; -// } +Mat4f_t* mat4f_sub_r(Mat4f_t *out, const Mat4f_t *m2) +{ + for(int i = 0; im[i]); + __m128 mb = _mm_load_ps(&m2->m[i]); -// return mat; -// } + __m128 mres = _mm_sub_ps(ma, mb); + _mm_store_ps(&out->m[i], mres); + +#elif defined (SIMD_ARCH) + float32x4_t ma = vld1q_f32(&out->m[i]); + float32x4_t mb = vld1q_f32(&m2->m[i]); + + float32x4_t mres = vsubq_f32(ma, mb); + + vst1q_f32(&out->m[i], mres); + +#else + for (int j = 0; j < 4; j++) { + out->m[i + j] -= m2->m[i + j]; + } +#endif + } + return out; +} + +Mat4f_t mat4_sub(const Mat4f_t* m1, const Mat4f_t* m2) +{ + Mat4f_t mout = mat4f_clone(m1); + mat4f_sub_r(&mout, m2); + return mout; +} // Mat4_t mat4_scl(const Mat4_t* m, float scalar) // { diff --git a/src/math/mat4.h b/src/math/mat4.h index 65fc108..21fb3c3 100644 --- a/src/math/mat4.h +++ b/src/math/mat4.h @@ -15,11 +15,16 @@ Mat4f_t mat4f_scalar(float f); Mat4f_t mat4f_zero(void); Mat4f_t mat4f_identity(void); -// Mat4_t mat4_zro(void); +inline static Mat4f_t mat4f_clone(const Mat4f_t *__restrict out) +{ + return *out; +} -// Mat4_t mat4_ity(void); +Mat4f_t mat4f_add(const Mat4f_t* m1, const Mat4f_t* m2); +Mat4f_t* mat4f_add_r(Mat4f_t* out, const Mat4f_t* m2); -// Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2); +Mat4f_t mat4f_sub(const Mat4f_t* m1, const Mat4f_t* m2); +Mat4f_t* mat4f_sub_r(Mat4f_t* out, const Mat4f_t* m2); // Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2);