From 3703ab17b00d0de07e56eaadc8778e3676db0f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Fri, 20 Jun 2025 10:18:48 +0200 Subject: [PATCH] Cleanup: remove clone function from basic vec operations - commented all functions in mat4.h - add shared math header --- src/math/common_math.h | 20 +++++ src/math/mat4.c | 187 +++++++++++++++++++---------------------- src/math/vec4.c | 30 ++----- src/math/vec4.h | 15 +--- 4 files changed, 115 insertions(+), 137 deletions(-) create mode 100644 src/math/common_math.h diff --git a/src/math/common_math.h b/src/math/common_math.h new file mode 100644 index 0000000..d89a9df --- /dev/null +++ b/src/math/common_math.h @@ -0,0 +1,20 @@ +// Common math library + +#ifndef MATH_H +#define MATH_H + +#include +#include +#include + +#if defined(__x86_64__) || defined(__i386__) + #define SIMD_X86 + #include +#elif defined(__aarch64__) || defined(__arm64__) + #define SIMD_ARCH + #include +#else + #define SIMD_NONE +#endif + +#endif // MATH_H diff --git a/src/math/mat4.c b/src/math/mat4.c index 2d549bf..70c92e2 100644 --- a/src/math/mat4.c +++ b/src/math/mat4.c @@ -1,107 +1,94 @@ -#include "mat4.h" -#include -#include +// #include "mat4.h" +// #include "math.h" +// Mat4_t mat4(const float arr[16]) +// { +// Mat4_t mat; +// memcpy(mat.m, arr, 16*sizeof(float)); +// return mat; +// } -#if defined(__x86_64__) || defined(__i386__) -#include // SSE -#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON) -#include // NEON -#else -#warning "SIMD intrinsics not enabled for this architecture" -#endif +// Mat4_t mat4_zro(void) +// { +// return (Mat4_t){0}; +// } -Mat4_t mat4(const float arr[16]) -{ - Mat4_t mat; - memcpy(mat.m, arr, 16*sizeof(float)); - return mat; -} +// Mat4_t mat4_ity(void) +// { +// return (Mat4_t) {{ +// 1, 0, 0, 0, +// 0, 1, 0, 0, +// 0, 0, 1, 0, +// 0, 0, 0, 1, +// }}; +// } -Mat4_t mat4_zro(void) -{ - return (Mat4_t){0}; -} +// Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2) +// { +// Mat4_t mat; -Mat4_t mat4_ity(void) -{ - return (Mat4_t) {{ - 1, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, 1, 0, - 0, 0, 0, 1, - }}; -} - -Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2) -{ - Mat4_t mat; - -#if defined(__x86_64__) || defined(__i386__) - // SSE : addition 4 floats en parallèle - for (int i = 0; i < 16; i += 4) { - __m128 a = _mm_loadu_ps(&m1->m[i]); - __m128 b = _mm_loadu_ps(&m2->m[i]); - __m128 c = _mm_add_ps(a, b); - _mm_storeu_ps(&mat.m[i], c); - } -#elif defined(__aarch64__) - printf("hello world"); - // NEON : addition 4 floats en parallèle - for (int i = 0; i < 16; i += 4) { - float32x4_t a = vld1q_f32(&m1->m[i]); - float32x4_t b = vld1q_f32(&m2->m[i]); - float32x4_t c = vaddq_f32(a, b); - vst1q_f32(&mat.m[i], c); - } -#else - // Fallback classique - for (int i = 0; i < 16; i++) { - mat.m[i] = m1->m[i] + m2->m[i]; - } -#endif - - return mat; -} - -Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2) -{ - Mat4_t mat; - - for(int i = 0; i<16; i++) { - mat.m[i] = m1->m[i] - m2->m[i]; - } - - return mat; -} - -Mat4_t mat4_scl(const Mat4_t* m, float scalar) -{ - Mat4_t mat; - - for(int i = 0; i<16; i++) { - mat.m[i] = m->m[i] * scalar; - } - - return mat; -} - -Mat4_t mat4_mul(const Mat4_t* m1, const Mat4_t* m2) -{ - Mat4_t mat; +// #if defined(__x86_64__) || defined(__i386__) - for(int i = 0; i<4; i++) { - int i3 = i * 3; - for (int j = 0; j < 4; j++) { - float sum = 0; - - for (int k = 0; k < 3; k++) { - sum += m1->m[i3 + k] * m2->m[k*3 + j]; - } - - mat.m[i3 + j] = sum; - } - } +// for (int i = 0; i < 16; i += 4) { +// __m128 a = _mm_loadu_ps(&m1->m[i]); +// __m128 b = _mm_loadu_ps(&m2->m[i]); +// __m128 c = _mm_add_ps(a, b); +// _mm_storeu_ps(&mat.m[i], c); +// } +// #elif defined(__aarch64__) +// for (int i = 0; i < 16; i += 4) { +// float32x4_t a = vld1q_f32(&m1->m[i]); +// float32x4_t b = vld1q_f32(&m2->m[i]); +// float32x4_t c = vaddq_f32(a, b); +// vst1q_f32(&mat.m[i], c); +// } +// #else +// for (int i = 0; i < 16; i++) { +// mat.m[i] = m1->m[i] + m2->m[i]; +// } +// #endif + +// return mat; +// } + +// Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2) +// { +// Mat4_t mat; + +// for(int i = 0; i<16; i++) { +// mat.m[i] = m1->m[i] - m2->m[i]; +// } + +// return mat; +// } + +// Mat4_t mat4_scl(const Mat4_t* m, float scalar) +// { +// Mat4_t mat; + +// for(int i = 0; i<16; i++) { +// mat.m[i] = m->m[i] * scalar; +// } + +// return mat; +// } + +// Mat4_t mat4_mul(const Mat4_t* m1, const Mat4_t* m2) +// { +// Mat4_t mat; - return mat; -} +// for(int i = 0; i<4; i++) { +// int i3 = i * 3; +// for (int j = 0; j < 4; j++) { +// float sum = 0; + +// for (int k = 0; k < 3; k++) { +// sum += m1->m[i3 + k] * m2->m[k*3 + j]; +// } + +// mat.m[i3 + j] = sum; +// } +// } + +// return mat; +// } diff --git a/src/math/vec4.c b/src/math/vec4.c index 3f1f2ef..a6bfd2f 100644 --- a/src/math/vec4.c +++ b/src/math/vec4.c @@ -1,18 +1,5 @@ -#include -#include -#include -#include - -#if defined(__x86_64__) || defined(__i386__) -#define SIMD_X86 -#include -#elif defined(__aarch64__) || defined(__arm64__) -#define SIMD_ARCH -#include -#else -#endif - #include "vec4.h" +#include "common_math.h" Vec4f_t vec4(float x, float y, float z, float w) { @@ -77,9 +64,8 @@ Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a) Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b) { - Vec4f_t vec = vec4f_clone(&a); - vec4f_add_r(&vec, b); - return vec; + vec4f_add_r(&a, b); + return a; } Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a) @@ -106,9 +92,8 @@ Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a) Vec4f_t vec4f_sub(Vec4f_t a, Vec4f_t b) { - Vec4f_t vec = vec4f_clone(&a); - vec4f_sub_r(&vec, b); - return vec; + vec4f_sub_r(&a, b); + return a; } Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar) @@ -134,9 +119,8 @@ Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar) Vec4f_t vec4f_scale(Vec4f_t a, float scalar) { - Vec4f_t vec = vec4f_clone(&a); - vec4f_scale_r(&vec, scalar); - return vec; + vec4f_scale_r(&a, scalar); + return a; } //float vec4f_dot(Vec4f_t a, Vec4f_t b) diff --git a/src/math/vec4.h b/src/math/vec4.h index 126604b..fe54e3d 100644 --- a/src/math/vec4.h +++ b/src/math/vec4.h @@ -1,20 +1,7 @@ #ifndef VECTOR4_H #define VECTOR4_H -#include -#include -#include - -#if defined(__x86_64__) || defined(__i386__) -#define SIMD_X86 -#include -#elif defined(__aarch64__) || defined(__arm64__) -#define SIMD_ARCH -#include -#else -#endif - -// must be aligned by 16 Bytes (less instruction executed for SSE) +// must be aligned by 16 Bytes (less instruction executed for SSE) typedef union { struct { float x, y, z, w; };