feat: rework math files

- add SSE functions
- remove vec3 / mat3
This commit is contained in:
2025-06-10 22:57:31 +02:00
parent 35d7f0da36
commit 8e26c51370
8 changed files with 208 additions and 379 deletions

View File

@@ -1,5 +1,15 @@
#include "mat4.h"
#include <string.h>
#include <stdio.h>
#if defined(__x86_64__) || defined(__i386__)
#include <xmmintrin.h> // SSE
#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON)
#include <arm_neon.h> // NEON
#else
#warning "SIMD intrinsics not enabled for this architecture"
#endif
Mat4_t mat4(const float arr[16])
{
@@ -26,10 +36,30 @@ Mat4_t mat4_ity(void)
Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2)
{
Mat4_t mat;
for(int i = 0; i<16; i++) {
#if defined(__x86_64__) || defined(__i386__)
// SSE : addition 4 floats en parallèle
for (int i = 0; i < 16; i += 4) {
__m128 a = _mm_loadu_ps(&m1->m[i]);
__m128 b = _mm_loadu_ps(&m2->m[i]);
__m128 c = _mm_add_ps(a, b);
_mm_storeu_ps(&mat.m[i], c);
}
#elif defined(__aarch64__)
printf("hello world");
// NEON : addition 4 floats en parallèle
for (int i = 0; i < 16; i += 4) {
float32x4_t a = vld1q_f32(&m1->m[i]);
float32x4_t b = vld1q_f32(&m2->m[i]);
float32x4_t c = vaddq_f32(a, b);
vst1q_f32(&mat.m[i], c);
}
#else
// Fallback classique
for (int i = 0; i < 16; i++) {
mat.m[i] = m1->m[i] + m2->m[i];
}
#endif
return mat;
}