feat: rework math files

- add SSE functions - remove vec3 / mat3
2026-01-25 13:34:24 +00:00 · 2025-06-10 22:57:31 +02:00
parent 35d7f0da36
commit 8e26c51370
8 changed files with 208 additions and 379 deletions
--- a/src/math/mat4.c
+++ b/src/math/mat4.c
@@ -1,5 +1,15 @@
 #include "mat4.h"
 #include <string.h>
+#include <stdio.h>
+
+
+#if defined(__x86_64__) || defined(__i386__)
+#include <xmmintrin.h>  // SSE
+#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON)
+#include <arm_neon.h>   // NEON
+#else
+#warning "SIMD intrinsics not enabled for this architecture"
+#endif

 Mat4_t mat4(const float arr[16]) 
 {
@@ -26,10 +36,30 @@ Mat4_t mat4_ity(void)
 Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2) 
 {
    Mat4_t mat;
-    
-    for(int i = 0; i<16; i++) {
+
+#if defined(__x86_64__) || defined(__i386__)
+    // SSE : addition 4 floats en parallèle
+    for (int i = 0; i < 16; i += 4) {
+        __m128 a = _mm_loadu_ps(&m1->m[i]);
+        __m128 b = _mm_loadu_ps(&m2->m[i]);
+        __m128 c = _mm_add_ps(a, b);
+        _mm_storeu_ps(&mat.m[i], c);
+    }
+#elif defined(__aarch64__)
+    printf("hello world");
+    // NEON : addition 4 floats en parallèle
+    for (int i = 0; i < 16; i += 4) {
+        float32x4_t a = vld1q_f32(&m1->m[i]);
+        float32x4_t b = vld1q_f32(&m2->m[i]);
+        float32x4_t c = vaddq_f32(a, b);
+        vst1q_f32(&mat.m[i], c);
+    }
+#else
+    // Fallback classique
+    for (int i = 0; i < 16; i++) {
        mat.m[i] = m1->m[i] + m2->m[i];
    }
+#endif

    return mat;
 }