mirror of
https://github.com/guezoloic/t3dsr.git
synced 2026-01-25 13:34:24 +00:00
feat: rework math files
- add SSE functions - remove vec3 / mat3
This commit is contained in:
@@ -1,5 +1,15 @@
|
||||
#include "mat4.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
#include <xmmintrin.h> // SSE
|
||||
#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON)
|
||||
#include <arm_neon.h> // NEON
|
||||
#else
|
||||
#warning "SIMD intrinsics not enabled for this architecture"
|
||||
#endif
|
||||
|
||||
Mat4_t mat4(const float arr[16])
|
||||
{
|
||||
@@ -26,10 +36,30 @@ Mat4_t mat4_ity(void)
|
||||
Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2)
|
||||
{
|
||||
Mat4_t mat;
|
||||
|
||||
for(int i = 0; i<16; i++) {
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
// SSE : addition 4 floats en parallèle
|
||||
for (int i = 0; i < 16; i += 4) {
|
||||
__m128 a = _mm_loadu_ps(&m1->m[i]);
|
||||
__m128 b = _mm_loadu_ps(&m2->m[i]);
|
||||
__m128 c = _mm_add_ps(a, b);
|
||||
_mm_storeu_ps(&mat.m[i], c);
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
printf("hello world");
|
||||
// NEON : addition 4 floats en parallèle
|
||||
for (int i = 0; i < 16; i += 4) {
|
||||
float32x4_t a = vld1q_f32(&m1->m[i]);
|
||||
float32x4_t b = vld1q_f32(&m2->m[i]);
|
||||
float32x4_t c = vaddq_f32(a, b);
|
||||
vst1q_f32(&mat.m[i], c);
|
||||
}
|
||||
#else
|
||||
// Fallback classique
|
||||
for (int i = 0; i < 16; i++) {
|
||||
mat.m[i] = m1->m[i] + m2->m[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
return mat;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user