From 8e26c5137027633e7a83aff2cf5b960d3e9a301d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Tue, 10 Jun 2025 22:57:31 +0200 Subject: [PATCH] feat: rework math files - add SSE functions - remove vec3 / mat3 --- src/main.c | 8 ++- src/math/mat3.c | 118 ---------------------------------- src/math/mat3.h | 29 --------- src/math/mat4.c | 34 +++++++++- src/math/vec3.c | 106 ------------------------------- src/math/vec3.h | 37 ----------- src/math/vec4.c | 166 ++++++++++++++++++++++++++++-------------------- src/math/vec4.h | 89 +++++++++++++++++++++----- 8 files changed, 208 insertions(+), 379 deletions(-) delete mode 100644 src/math/mat3.c delete mode 100644 src/math/mat3.h delete mode 100644 src/math/vec3.c delete mode 100644 src/math/vec3.h diff --git a/src/main.c b/src/main.c index a201ef7..573831a 100644 --- a/src/main.c +++ b/src/main.c @@ -1,7 +1,11 @@ -#include "math/vec3.h" #include +#include "math/vec4.h" int main(void) { - return 0; + Vec4f_t vec = vec4(1.f, 2, 3, 4); + float vec_array[4] = {1, 2, 3, 4}; + + vec4f_add_r(&vec, vec4f_from_array(vec_array)); + printf("%f\n", vec.data[1]); } \ No newline at end of file diff --git a/src/math/mat3.c b/src/math/mat3.c deleted file mode 100644 index b6f69d2..0000000 --- a/src/math/mat3.c +++ /dev/null @@ -1,118 +0,0 @@ -#include "mat3.h" -#include - -Mat3_t mat3(const float arr[9]) -{ - Mat3_t mat; - memcpy(mat.m, arr, 9*sizeof(float)); - return mat; -} - -Mat3_t mat3_zro(void) -{ - return (Mat3_t){0}; -} - -Mat3_t mat3_ity(void) -{ - return (Mat3_t) {{ - 1, 0, 0, - 0, 1, 0, - 0, 0, 1 - }}; -} - -Mat3_t mat3_add(const Mat3_t* m1, const Mat3_t* m2) -{ - Mat3_t mat; - - for(int i = 0; i<9; i++) { - mat.m[i] = m1->m[i] + m2->m[i]; - } - - return mat; -} - -Mat3_t mat3_sub(const Mat3_t* m1, const Mat3_t* m2) -{ - Mat3_t mat; - - for(int i = 0; i<9; i++) { - mat.m[i] = m1->m[i] - m2->m[i]; - } - - return mat; -} - -Mat3_t mat3_scl(const Mat3_t* m, float scalar) -{ - Mat3_t mat; - - for(int i = 0; i<9; i++) { - mat.m[i] = m->m[i] * scalar; - } - - return mat; -} - -Mat3_t mat3_mul(const Mat3_t* m1, const Mat3_t* m2) -{ - Mat3_t mat; - - for(int i = 0; i<3; i++) { - int i3 = i * 3; - for (int j = 0; j < 3; j++) { - float sum = 0; - - for (int k = 0; k < 3; k++) { - sum += m1->m[i3 + k] * m2->m[k*3 + j]; - } - - mat.m[i3 + j] = sum; - } - } - - return mat; -} - -Mat3_t mat3_tpo(const Mat3_t* m) -{ - Mat3_t mat; - - for(int i = 0; i<3; i++) { - int i3 = i * 3; - - for (int j = 0; j < 3; j++) { - mat.m[i3 + j] = m->m[j*3 + i]; - } - } - - return mat; -} - -float mat3_det(const Mat3_t* m) -{ - return m->m[0] * (m->m[4] * m->m[8] - m->m[5] * m->m[7]) - - m->m[1] * (m->m[3] * m->m[8] - m->m[5] * m->m[6]) + - m->m[2] * (m->m[3] * m->m[7] - m->m[4] * m->m[6]); -} - -Mat3_t mat3_inv(const Mat3_t* m) { - Mat3_t inv; - float det = mat3_det(m); - if (det == 0) return mat3_ity(); - - float invDet = 1.0f / det; - - inv.m[0] = (m->m[4] * m->m[8] - m->m[5] * m->m[7]) * invDet; - inv.m[1] = -(m->m[1] * m->m[8] - m->m[2] * m->m[7]) * invDet; - inv.m[2] = (m->m[1] * m->m[5] - m->m[2] * m->m[4]) * invDet; - inv.m[3] = -(m->m[3] * m->m[8] - m->m[5] * m->m[6]) * invDet; - inv.m[4] = (m->m[0] * m->m[8] - m->m[2] * m->m[6]) * invDet; - inv.m[5] = -(m->m[0] * m->m[5] - m->m[2] * m->m[3]) * invDet; - inv.m[6] = (m->m[3] * m->m[7] - m->m[4] * m->m[6]) * invDet; - inv.m[7] = -(m->m[0] * m->m[7] - m->m[1] * m->m[6]) * invDet; - inv.m[8] = (m->m[0] * m->m[4] - m->m[1] * m->m[3]) * invDet; - - return inv; -} diff --git a/src/math/mat3.h b/src/math/mat3.h deleted file mode 100644 index 40ed167..0000000 --- a/src/math/mat3.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef MATRIX3_H -#define MATRIX3_H - -typedef struct -{ - float m[9]; -} Mat3_t; - -Mat3_t mat3(const float arr[9]); - -Mat3_t mat3_zro(void); - -Mat3_t mat3_ity(void); - -Mat3_t mat3_add(const Mat3_t* m1, const Mat3_t* m2); - -Mat3_t mat3_sub(const Mat3_t* m1, const Mat3_t* m2); - -Mat3_t mat3_scl(const Mat3_t* m, float scalar); - -Mat3_t mat3_mul(const Mat3_t* m1, const Mat3_t* m2); - -Mat3_t mat3_tpo(const Mat3_t* m); - -float mat3_det(const Mat3_t* m); - -Mat3_t mat3_inv(const Mat3_t* m); - -#endif // MATRIX3_H diff --git a/src/math/mat4.c b/src/math/mat4.c index 5124c44..377e133 100644 --- a/src/math/mat4.c +++ b/src/math/mat4.c @@ -1,5 +1,15 @@ #include "mat4.h" #include +#include + + +#if defined(__x86_64__) || defined(__i386__) +#include // SSE +#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON) +#include // NEON +#else +#warning "SIMD intrinsics not enabled for this architecture" +#endif Mat4_t mat4(const float arr[16]) { @@ -26,10 +36,30 @@ Mat4_t mat4_ity(void) Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2) { Mat4_t mat; - - for(int i = 0; i<16; i++) { + +#if defined(__x86_64__) || defined(__i386__) + // SSE : addition 4 floats en parallèle + for (int i = 0; i < 16; i += 4) { + __m128 a = _mm_loadu_ps(&m1->m[i]); + __m128 b = _mm_loadu_ps(&m2->m[i]); + __m128 c = _mm_add_ps(a, b); + _mm_storeu_ps(&mat.m[i], c); + } +#elif defined(__aarch64__) + printf("hello world"); + // NEON : addition 4 floats en parallèle + for (int i = 0; i < 16; i += 4) { + float32x4_t a = vld1q_f32(&m1->m[i]); + float32x4_t b = vld1q_f32(&m2->m[i]); + float32x4_t c = vaddq_f32(a, b); + vst1q_f32(&mat.m[i], c); + } +#else + // Fallback classique + for (int i = 0; i < 16; i++) { mat.m[i] = m1->m[i] + m2->m[i]; } +#endif return mat; } diff --git a/src/math/vec3.c b/src/math/vec3.c deleted file mode 100644 index 36d2724..0000000 --- a/src/math/vec3.c +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include - -#include "vec3.h" - -inline Vec3_t vec3(float x, float y, float z) -{ - return (Vec3_t) {x, y, z}; -} - -Vec3_t vec3_add(Vec3_t v1, Vec3_t v2) -{ - return vec3(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z); -} - -Vec3_t vec3_sub(Vec3_t v1, Vec3_t v2) -{ - return vec3(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z); -} - -Vec3_t vec3_scale(Vec3_t v, float scalar) -{ - return vec3(v.x * scalar, v.y * scalar, v.z * scalar); -} - -float vec3_dot(Vec3_t a, Vec3_t b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z; -} - -float vec3_len(Vec3_t v) -{ - return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); -} - -Vec3_t vec3_norm(Vec3_t v) -{ - - float length = vec3_len(v); - if (length == 0.f) return vec3(0, 0, 0); - - return vec3_scale(v, 1.f / length); -} - -Vec3_t vec3_lerp(Vec3_t a, Vec3_t b, float t) -{ - t = fmaxf(0.f, fminf(t, 1.f)); - return vec3( - a.x + t * (b.x - a.x), - a.y + t * (b.y - a.y), - a.z + t * (b.z - a.z) - ); -} - -Vec3_t vec3_cross(Vec3_t a, Vec3_t b) -{ - return vec3( - a.y * b.z - a.z * b.y, - a.z * b.x - a.x * b.z, - a.x * b.y - a.y * b.x - ); -} - -float vec3_angle(Vec3_t a, Vec3_t b) -{ - float lenA = vec3_len(a); - float lenB = vec3_len(b); - - if (lenA < FLT_EPSILON || lenB < FLT_EPSILON) - return NAN; - - return acosf(fmaxf(-1.f, - fminf(vec3_dot(a, b) / (lenA * lenB), 1.f))); -} - -Vec3_t vec3_proj(Vec3_t a, Vec3_t b) -{ - return vec3_scale(b, - vec3_dot(a, b) / vec3_dot(b, b)); -} - -Vec3_t vec3_refl(Vec3_t v, Vec3_t normal) -{ - return vec3_sub(v, vec3_scale(vec3_proj(v, normal), 2.f)); -} - -float vec3_dist(Vec3_t a, Vec3_t b) -{ - return vec3_len(vec3_sub(a, b)); -} - -Vec3_t vec3_rotate(Vec3_t v, Vec3_t axis, float angle) -{ - Vec3_t normAxis = vec3_norm(axis); - - Vec3_t rlt = vec3_add( - vec3_add( - vec3_scale(v, cosf(angle)), - vec3_scale(vec3_cross(normAxis, v), sinf(angle)) - ), - vec3_scale(normAxis, vec3_dot(normAxis, v) * (1 - cosf(angle))) - ); - - return rlt; -} diff --git a/src/math/vec3.h b/src/math/vec3.h deleted file mode 100644 index b393108..0000000 --- a/src/math/vec3.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef VEC3__H -#define VEC3__H - -typedef struct -{ - float x, y, z; -} Vec3_t; - -Vec3_t vec3(float x, float y, float z); - -Vec3_t vec3_add(Vec3_t v1, Vec3_t v2); - -Vec3_t vec3_sub(Vec3_t v1, Vec3_t v2); - -Vec3_t vec3_scale(Vec3_t v, float scalar); - -float vec3_dot(Vec3_t a, Vec3_t b); - -float vec3_len(Vec3_t v); - -Vec3_t vec3_norm(Vec3_t v); - -Vec3_t vec3_lerp(Vec3_t a, Vec3_t b, float t); - -Vec3_t vec3_cross(Vec3_t a, Vec3_t b); - -float vec3_angle(Vec3_t a, Vec3_t b); - -Vec3_t vec3_proj(Vec3_t a, Vec3_t b); - -Vec3_t vec3_refl(Vec3_t v, Vec3_t normal); - -float vec3_rist(Vec3_t a, Vec3_t b); - -Vec3_t vec3_rotate(Vec3_t v, Vec3_t axis, float angle); - -#endif // VEC3__H diff --git a/src/math/vec4.c b/src/math/vec4.c index 13bfd1f..fbc1f6d 100644 --- a/src/math/vec4.c +++ b/src/math/vec4.c @@ -1,97 +1,123 @@ -#include -#include #include +#include +#include +#include + +#if defined(__x86_64__) || defined(__i386__) +#define SIMD_X86 +#include +#elif defined(__aarch64__) || defined(__arm64__) +#define SIMD_ARCH +#include +#else +#endif #include "vec4.h" -inline Vec4_t vec4(float x, float y, float z, float w) +Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a) { - return (Vec4_t) {x, y, z, w}; +#if defined (SIMD_X86) + __m128 va = _mm_load_ps(a.data); + __m128 vb = _mm_load_ps(out->data); + __m128 vres = _mm_add_ps(va, vb); + _mm_store_ps(out->data, vres); + +#elif defined (SIMD_ARCH) + float32x4_t va = vld1q_f32(a.data); + float32x4_t vb = vld1q_f32(out->data); + float32x4_t vres = vaddq_f32(va, vb); + vst1q_f32(out->data, vres); +#else + for(int i = 0; i<4; i++) { + out->data[i] += a.data[i]; + } +#endif + return *out; } -Vec4_t vec4_add(Vec4_t v1, Vec4_t v2) +Vec4f_t vec4_add(Vec4f_t v1, Vec4f_t v2) { return vec4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w); } -Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2) -{ - return vec4(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w); -} +// Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2) +// { +// return vec4(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w); +// } -Vec4_t vec4_scale(Vec4_t v, float scalar) -{ - return vec4(v.x * scalar, v.y * scalar, v.z * scalar, v.w * scalar); -} +// Vec4_t vec4_scale(Vec4_t v, float scalar) +// { +// return vec4(v.x * scalar, v.y * scalar, v.z * scalar, v.w * scalar); +// } -float vec4_dot(Vec4_t a, Vec4_t b) -{ - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} +// float vec4_dot(Vec4_t a, Vec4_t b) +// { +// return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; +// } -float vec4_len(Vec4_t v) -{ - return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w); -} +// float vec4_len(Vec4_t v) +// { +// return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w); +// } -Vec4_t vec4_norm(Vec4_t v) -{ - float length = vec4_len(v); - if (length == 0.f) return vec4(0, 0, 0, 0); +// Vec4_t vec4_norm(Vec4_t v) +// { +// float length = vec4_len(v); +// if (length == 0.f) return vec4(0, 0, 0, 0); - return vec4_scale(v, 1.f / length); -} +// return vec4_scale(v, 1.f / length); +// } -Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t) -{ - t = fmaxf(0.f, fminf(t, 1.f)); +// Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t) +// { +// t = fmaxf(0.f, fminf(t, 1.f)); - return vec4( - a.x + t * (b.x - a.x), - a.y + t * (b.y - a.y), - a.z + t * (b.z - a.z), - a.w + t * (b.w - a.w) - ); -} +// return vec4( +// a.x + t * (b.x - a.x), +// a.y + t * (b.y - a.y), +// a.z + t * (b.z - a.z), +// a.w + t * (b.w - a.w) +// ); +// } -float vec4Angle(Vec4_t a, Vec4_t b) -{ - float lenA = vec4_len(a); - float lenB = vec4_len(b); +// float vec4Angle(Vec4_t a, Vec4_t b) +// { +// float lenA = vec4_len(a); +// float lenB = vec4_len(b); - if (isnan(lenA) || isnan(lenB) - || lenA < FLT_EPSILON - || lenB < FLT_EPSILON) - return NAN; +// if (isnan(lenA) || isnan(lenB) +// || lenA < FLT_EPSILON +// || lenB < FLT_EPSILON) +// return NAN; - float dot = vec4_dot(a, b); - float cosTheta = dot / (lenA * lenB); +// float dot = vec4_dot(a, b); +// float cosTheta = dot / (lenA * lenB); - cosTheta = fmaxf(-1.f, fminf(cosTheta, 1.f)); +// cosTheta = fmaxf(-1.f, fminf(cosTheta, 1.f)); - return acosf(cosTheta); -} +// return acosf(cosTheta); +// } -Vec4_t vec4_proj(Vec4_t a, Vec4_t b) -{ - float dotA = vec4_dot(a, b); - float dotB = vec4_dot(b, b); +// Vec4_t vec4_proj(Vec4_t a, Vec4_t b) +// { +// float dotA = vec4_dot(a, b); +// float dotB = vec4_dot(b, b); - float scale = dotA / dotB; - return vec4_scale(b, scale); -} +// float scale = dotA / dotB; +// return vec4_scale(b, scale); +// } -Vec4_t vec4_refl(Vec4_t v, Vec4_t normal) -{ - Vec4_t proj = vec4_proj(v, normal); - Vec4_t scal = vec4_scale(proj, 2.f); - Vec4_t rlt = vec4_sub(v, scal); - return rlt; -} +// Vec4_t vec4_refl(Vec4_t v, Vec4_t normal) +// { +// Vec4_t proj = vec4_proj(v, normal); +// Vec4_t scal = vec4_scale(proj, 2.f); +// Vec4_t rlt = vec4_sub(v, scal); +// return rlt; +// } -float vec4_dist(Vec4_t a, Vec4_t b) -{ - Vec4_t vsub = vec4_sub(a, b); - float rlt = vec4_len(vsub); - return rlt; -} +// float vec4_dist(Vec4_t a, Vec4_t b) +// { +// Vec4_t vsub = vec4_sub(a, b); +// float rlt = vec4_len(vsub); +// return rlt; +// } diff --git a/src/math/vec4.h b/src/math/vec4.h index ef049b0..0882d40 100644 --- a/src/math/vec4.h +++ b/src/math/vec4.h @@ -1,33 +1,92 @@ #ifndef VECTOR4_H #define VECTOR4_H -typedef struct +#include +#include +#include + +#if defined(__x86_64__) || defined(__i386__) +#define SIMD_X86 +#include +#elif defined(__aarch64__) || defined(__arm64__) +#define SIMD_ARCH +#include +#else +#endif + +typedef union { - float x, y, z, w; -} Vec4_t; + struct { float x, y, z, w; }; + float data[4]; +} Vec4f_t; -Vec4_t vec4(float x, float y, float z, float w); +static inline Vec4f_t vec4f_from_array(float *__restrict val) +{ + Vec4f_t vec4; + memcpy(vec4.data, val, 4*sizeof(float)); + return vec4; +} -Vec4_t vec4_add(Vec4_t v1, Vec4_t v2); +static inline Vec4f_t vec4(float x, float y, float z, float w) +{ + return (Vec4f_t){x, y, z, w}; +} -Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2); +// (f, f, f, f) +static inline Vec4f_t vec4f_scalar(float f) { + Vec4f_t vec4; -Vec4_t vec4_scale(Vec4_t v, float scalar); +// store f x 4 in register +// add all register into data +#if defined(SIMD_X86) + __m128 scalar = _mm_set1_ps(f); + _mm_storeu_ps(vec4.data, scalar); -float vec4_dot(Vec4_t a, Vec4_t b); +#elif defined(SIMD_ARCH) + float32x4_t scalar = vdupq_n_f32(f); + vst1q_f32(vec4.data, scalar); -float vec4_len(Vec4_t v); +// add one by one each value to their specific address +#else + for (int i = 0; i < 4; i++) { + vec4.data[i] = f; + } +#endif + return vec4; +} -Vec4_t vec4_norm(Vec4_t v); +// (0, 0, 0, 0) +static inline Vec4f_t Vec4f_zero(void) +{ + return vec4f_scalar(0.f); +} -Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t); +Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a); +Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b); -float vec4_angle(Vec4_t a, Vec4_t b); -Vec4_t vec4_proj(Vec4_t a, Vec4_t b); +// Vec4_t vec4(float x, float y, float z, float w); -Vec4_t vec4_refl(Vec4_t v, Vec4_t normal); +// Vec4_t vec4_add(Vec4_t v1, Vec4_t v2); -float vec4_dist(Vec4_t a, Vec4_t b); +// Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2); + +// Vec4_t vec4_scale(Vec4_t v, float scalar); + +// float vec4_dot(Vec4_t a, Vec4_t b); + +// float vec4_len(Vec4_t v); + +// Vec4_t vec4_norm(Vec4_t v); + +// Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t); + +// float vec4_angle(Vec4_t a, Vec4_t b); + +// Vec4_t vec4_proj(Vec4_t a, Vec4_t b); + +// Vec4_t vec4_refl(Vec4_t v, Vec4_t normal); + +// float vec4_dist(Vec4_t a, Vec4_t b); #endif // VECTOR4_H