From 3703ab17b00d0de07e56eaadc8778e3676db0f92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= <loicguezo@gmail.com>
Date: Fri, 20 Jun 2025 10:18:48 +0200
Subject: [PATCH] Cleanup: remove clone function from basic vec operations -
 commented all functions in mat4.h - add shared math header

---
 src/math/common_math.h |  20 +++++
 src/math/mat4.c        | 187 +++++++++++++++++++----------------------
 src/math/vec4.c        |  30 ++-----
 src/math/vec4.h        |  15 +---
 4 files changed, 115 insertions(+), 137 deletions(-)
 create mode 100644 src/math/common_math.h

diff --git a/src/math/common_math.h b/src/math/common_math.h
new file mode 100644
index 0000000..d89a9df
--- /dev/null
+++ b/src/math/common_math.h
@@ -0,0 +1,20 @@
+// Common math library
+
+#ifndef MATH_H
+#define MATH_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#if defined(__x86_64__) || defined(__i386__)
+    #define SIMD_X86
+    #include <xmmintrin.h>
+#elif defined(__aarch64__) || defined(__arm64__)
+    #define SIMD_ARCH
+    #include <arm_neon.h>
+#else
+    #define SIMD_NONE
+#endif
+
+#endif // MATH_H
diff --git a/src/math/mat4.c b/src/math/mat4.c
index 2d549bf..70c92e2 100644
--- a/src/math/mat4.c
+++ b/src/math/mat4.c
@@ -1,107 +1,94 @@
-#include "mat4.h"
-#include <string.h>
-#include <stdio.h>
+// #include "mat4.h"
+// #include "math.h"
 
+// Mat4_t mat4(const float arr[16]) 
+// {
+//     Mat4_t mat;
+//     memcpy(mat.m, arr, 16*sizeof(float));
+//     return mat;
+// }
 
-#if defined(__x86_64__) || defined(__i386__)
-#include <xmmintrin.h>  // SSE
-#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON)
-#include <arm_neon.h>   // NEON
-#else
-#warning "SIMD intrinsics not enabled for this architecture"
-#endif
+// Mat4_t mat4_zro(void) 
+// {
+//     return (Mat4_t){0};
+// }
 
-Mat4_t mat4(const float arr[16]) 
-{
-    Mat4_t mat;
-    memcpy(mat.m, arr, 16*sizeof(float));
-    return mat;
-}
+// Mat4_t mat4_ity(void) 
+// {
+//     return (Mat4_t) {{
+//         1, 0, 0, 0, 
+//         0, 1, 0, 0,
+//         0, 0, 1, 0,
+//         0, 0, 0, 1,
+//     }};
+// }
 
-Mat4_t mat4_zro(void) 
-{
-    return (Mat4_t){0};
-}
+// Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2) 
+// {
+//     Mat4_t mat;
 
-Mat4_t mat4_ity(void) 
-{
-    return (Mat4_t) {{
-        1, 0, 0, 0, 
-        0, 1, 0, 0,
-        0, 0, 1, 0,
-        0, 0, 0, 1,
-    }};
-}
-
-Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2) 
-{
-    Mat4_t mat;
-
-#if defined(__x86_64__) || defined(__i386__)
-    // SSE : addition 4 floats en parallèle
-    for (int i = 0; i < 16; i += 4) {
-        __m128 a = _mm_loadu_ps(&m1->m[i]);
-        __m128 b = _mm_loadu_ps(&m2->m[i]);
-        __m128 c = _mm_add_ps(a, b);
-        _mm_storeu_ps(&mat.m[i], c);
-    }
-#elif defined(__aarch64__)
-    printf("hello world");
-    // NEON : addition 4 floats en parallèle
-    for (int i = 0; i < 16; i += 4) {
-        float32x4_t a = vld1q_f32(&m1->m[i]);
-        float32x4_t b = vld1q_f32(&m2->m[i]);
-        float32x4_t c = vaddq_f32(a, b);
-        vst1q_f32(&mat.m[i], c);
-    }
-#else
-    // Fallback classique
-    for (int i = 0; i < 16; i++) {
-        mat.m[i] = m1->m[i] + m2->m[i];
-    }
-#endif
-
-    return mat;
-}
-
-Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2) 
-{
-    Mat4_t mat;
-
-    for(int i = 0; i<16; i++) {
-        mat.m[i] = m1->m[i] - m2->m[i];
-    }
-
-    return mat;
-}
-
-Mat4_t mat4_scl(const Mat4_t* m, float scalar) 
-{
-    Mat4_t mat;
-
-    for(int i = 0; i<16; i++) {
-        mat.m[i] = m->m[i] * scalar;
-    }
-
-    return mat;
-}
-
-Mat4_t mat4_mul(const Mat4_t* m1, const Mat4_t* m2) 
-{
-    Mat4_t mat;
+// #if defined(__x86_64__) || defined(__i386__)
     
-    for(int i = 0; i<4; i++) {
-        int i3 = i * 3;
-        for (int j = 0; j < 4; j++) {
-            float sum = 0;
-            
-            for (int k = 0; k < 3; k++) {
-                sum += m1->m[i3 + k] * m2->m[k*3 + j];
-            }
-            
-            mat.m[i3 + j] = sum;
-        }
-    }
+//     for (int i = 0; i < 16; i += 4) {
+//         __m128 a = _mm_loadu_ps(&m1->m[i]);
+//         __m128 b = _mm_loadu_ps(&m2->m[i]);
+//         __m128 c = _mm_add_ps(a, b);
+//         _mm_storeu_ps(&mat.m[i], c);
+//     }
+// #elif defined(__aarch64__)
+//     for (int i = 0; i < 16; i += 4) {
+//         float32x4_t a = vld1q_f32(&m1->m[i]);
+//         float32x4_t b = vld1q_f32(&m2->m[i]);
+//         float32x4_t c = vaddq_f32(a, b);
+//         vst1q_f32(&mat.m[i], c);
+//     }
+// #else
+//     for (int i = 0; i < 16; i++) {
+//         mat.m[i] = m1->m[i] + m2->m[i];
+//     }
+// #endif
+
+//     return mat;
+// }
+
+// Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2) 
+// {
+//     Mat4_t mat;
+
+//     for(int i = 0; i<16; i++) {
+//         mat.m[i] = m1->m[i] - m2->m[i];
+//     }
+
+//     return mat;
+// }
+
+// Mat4_t mat4_scl(const Mat4_t* m, float scalar) 
+// {
+//     Mat4_t mat;
+
+//     for(int i = 0; i<16; i++) {
+//         mat.m[i] = m->m[i] * scalar;
+//     }
+
+//     return mat;
+// }
+
+// Mat4_t mat4_mul(const Mat4_t* m1, const Mat4_t* m2) 
+// {
+//     Mat4_t mat;
     
-    return mat;
-}
+//     for(int i = 0; i<4; i++) {
+//         int i3 = i * 3;
+//         for (int j = 0; j < 4; j++) {
+//             float sum = 0;
+            
+//             for (int k = 0; k < 3; k++) {
+//                 sum += m1->m[i3 + k] * m2->m[k*3 + j];
+//             }
+            
+//             mat.m[i3 + j] = sum;
+//         }
+//     }
+    
+//     return mat;
+// }
diff --git a/src/math/vec4.c b/src/math/vec4.c
index 3f1f2ef..a6bfd2f 100644
--- a/src/math/vec4.c
+++ b/src/math/vec4.c
@@ -1,18 +1,5 @@
-#include <float.h>
-#include <math.h>
-#include <string.h>
-#include <stdlib.h>
-
-#if defined(__x86_64__) || defined(__i386__)
-#define SIMD_X86
-#include <xmmintrin.h>
-#elif defined(__aarch64__) || defined(__arm64__)
-#define SIMD_ARCH
-#include <arm_neon.h>
-#else
-#endif
-
 #include "vec4.h"
+#include "common_math.h"
 
 Vec4f_t vec4(float x, float y, float z, float w) 
 {
@@ -77,9 +64,8 @@ Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a)
 
 Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b)
 {
-    Vec4f_t vec = vec4f_clone(&a);
-    vec4f_add_r(&vec, b);
-    return vec;
+    vec4f_add_r(&a, b);
+    return a;
 }
 
 Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a)
@@ -106,9 +92,8 @@ Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a)
 
 Vec4f_t vec4f_sub(Vec4f_t a, Vec4f_t b)
 {
-    Vec4f_t vec = vec4f_clone(&a);
-    vec4f_sub_r(&vec, b);
-    return vec;
+    vec4f_sub_r(&a, b);
+    return a;
 }
 
 Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar)
@@ -134,9 +119,8 @@ Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar)
 
 Vec4f_t vec4f_scale(Vec4f_t a, float scalar)
 {
-    Vec4f_t vec = vec4f_clone(&a);
-    vec4f_scale_r(&vec, scalar);
-    return vec;
+    vec4f_scale_r(&a, scalar);
+    return a;
 }
 
 //float vec4f_dot(Vec4f_t a, Vec4f_t b)
diff --git a/src/math/vec4.h b/src/math/vec4.h
index 126604b..fe54e3d 100644
--- a/src/math/vec4.h
+++ b/src/math/vec4.h
@@ -1,20 +1,7 @@
 #ifndef VECTOR4_H
 #define VECTOR4_H
 
-#include <stdlib.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if defined(__x86_64__) || defined(__i386__)
-#define SIMD_X86
-#include <xmmintrin.h>
-#elif defined(__aarch64__) || defined(__arm64__)
-#define SIMD_ARCH
-#include <arm_neon.h>
-#else
-#endif
-
-// must be aligned by 16 Bytes (less instruction executed  for SSE)
+// must be aligned by 16 Bytes (less instruction executed for SSE)
 typedef union
 {
     struct { float x, y, z, w; };