Cleanup: remove clone function from basic vec operations

- commented all functions in mat4.h
- add shared math header
This commit is contained in:
2025-06-20 10:18:48 +02:00
parent e157997d98
commit 3703ab17b0
4 changed files with 115 additions and 137 deletions

20
src/math/common_math.h Normal file
View File

@@ -0,0 +1,20 @@
// Common math library
#ifndef MATH_H
#define MATH_H
#include <stdlib.h>
#include <string.h>
#include <math.h>
#if defined(__x86_64__) || defined(__i386__)
#define SIMD_X86
#include <xmmintrin.h>
#elif defined(__aarch64__) || defined(__arm64__)
#define SIMD_ARCH
#include <arm_neon.h>
#else
#define SIMD_NONE
#endif
#endif // MATH_H

View File

@@ -1,107 +1,94 @@
#include "mat4.h" // #include "mat4.h"
#include <string.h> // #include "math.h"
#include <stdio.h>
// Mat4_t mat4(const float arr[16])
// {
// Mat4_t mat;
// memcpy(mat.m, arr, 16*sizeof(float));
// return mat;
// }
#if defined(__x86_64__) || defined(__i386__) // Mat4_t mat4_zro(void)
#include <xmmintrin.h> // SSE // {
#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON) // return (Mat4_t){0};
#include <arm_neon.h> // NEON // }
#else
#warning "SIMD intrinsics not enabled for this architecture"
#endif
Mat4_t mat4(const float arr[16]) // Mat4_t mat4_ity(void)
{ // {
Mat4_t mat; // return (Mat4_t) {{
memcpy(mat.m, arr, 16*sizeof(float)); // 1, 0, 0, 0,
return mat; // 0, 1, 0, 0,
} // 0, 0, 1, 0,
// 0, 0, 0, 1,
// }};
// }
Mat4_t mat4_zro(void) // Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2)
{ // {
return (Mat4_t){0}; // Mat4_t mat;
}
Mat4_t mat4_ity(void) // #if defined(__x86_64__) || defined(__i386__)
{
return (Mat4_t) {{
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0,
0, 0, 0, 1,
}};
}
Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2)
{
Mat4_t mat;
#if defined(__x86_64__) || defined(__i386__)
// SSE : addition 4 floats en parallèle
for (int i = 0; i < 16; i += 4) {
__m128 a = _mm_loadu_ps(&m1->m[i]);
__m128 b = _mm_loadu_ps(&m2->m[i]);
__m128 c = _mm_add_ps(a, b);
_mm_storeu_ps(&mat.m[i], c);
}
#elif defined(__aarch64__)
printf("hello world");
// NEON : addition 4 floats en parallèle
for (int i = 0; i < 16; i += 4) {
float32x4_t a = vld1q_f32(&m1->m[i]);
float32x4_t b = vld1q_f32(&m2->m[i]);
float32x4_t c = vaddq_f32(a, b);
vst1q_f32(&mat.m[i], c);
}
#else
// Fallback classique
for (int i = 0; i < 16; i++) {
mat.m[i] = m1->m[i] + m2->m[i];
}
#endif
return mat;
}
Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2)
{
Mat4_t mat;
for(int i = 0; i<16; i++) {
mat.m[i] = m1->m[i] - m2->m[i];
}
return mat;
}
Mat4_t mat4_scl(const Mat4_t* m, float scalar)
{
Mat4_t mat;
for(int i = 0; i<16; i++) {
mat.m[i] = m->m[i] * scalar;
}
return mat;
}
Mat4_t mat4_mul(const Mat4_t* m1, const Mat4_t* m2)
{
Mat4_t mat;
for(int i = 0; i<4; i++) { // for (int i = 0; i < 16; i += 4) {
int i3 = i * 3; // __m128 a = _mm_loadu_ps(&m1->m[i]);
for (int j = 0; j < 4; j++) { // __m128 b = _mm_loadu_ps(&m2->m[i]);
float sum = 0; // __m128 c = _mm_add_ps(a, b);
// _mm_storeu_ps(&mat.m[i], c);
for (int k = 0; k < 3; k++) { // }
sum += m1->m[i3 + k] * m2->m[k*3 + j]; // #elif defined(__aarch64__)
} // for (int i = 0; i < 16; i += 4) {
// float32x4_t a = vld1q_f32(&m1->m[i]);
mat.m[i3 + j] = sum; // float32x4_t b = vld1q_f32(&m2->m[i]);
} // float32x4_t c = vaddq_f32(a, b);
} // vst1q_f32(&mat.m[i], c);
// }
// #else
// for (int i = 0; i < 16; i++) {
// mat.m[i] = m1->m[i] + m2->m[i];
// }
// #endif
// return mat;
// }
// Mat4_t mat4_sub(const Mat4_t* m1, const Mat4_t* m2)
// {
// Mat4_t mat;
// for(int i = 0; i<16; i++) {
// mat.m[i] = m1->m[i] - m2->m[i];
// }
// return mat;
// }
// Mat4_t mat4_scl(const Mat4_t* m, float scalar)
// {
// Mat4_t mat;
// for(int i = 0; i<16; i++) {
// mat.m[i] = m->m[i] * scalar;
// }
// return mat;
// }
// Mat4_t mat4_mul(const Mat4_t* m1, const Mat4_t* m2)
// {
// Mat4_t mat;
return mat; // for(int i = 0; i<4; i++) {
} // int i3 = i * 3;
// for (int j = 0; j < 4; j++) {
// float sum = 0;
// for (int k = 0; k < 3; k++) {
// sum += m1->m[i3 + k] * m2->m[k*3 + j];
// }
// mat.m[i3 + j] = sum;
// }
// }
// return mat;
// }

View File

@@ -1,18 +1,5 @@
#include <float.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#if defined(__x86_64__) || defined(__i386__)
#define SIMD_X86
#include <xmmintrin.h>
#elif defined(__aarch64__) || defined(__arm64__)
#define SIMD_ARCH
#include <arm_neon.h>
#else
#endif
#include "vec4.h" #include "vec4.h"
#include "common_math.h"
Vec4f_t vec4(float x, float y, float z, float w) Vec4f_t vec4(float x, float y, float z, float w)
{ {
@@ -77,9 +64,8 @@ Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a)
Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b) Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b)
{ {
Vec4f_t vec = vec4f_clone(&a); vec4f_add_r(&a, b);
vec4f_add_r(&vec, b); return a;
return vec;
} }
Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a) Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a)
@@ -106,9 +92,8 @@ Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a)
Vec4f_t vec4f_sub(Vec4f_t a, Vec4f_t b) Vec4f_t vec4f_sub(Vec4f_t a, Vec4f_t b)
{ {
Vec4f_t vec = vec4f_clone(&a); vec4f_sub_r(&a, b);
vec4f_sub_r(&vec, b); return a;
return vec;
} }
Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar) Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar)
@@ -134,9 +119,8 @@ Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar)
Vec4f_t vec4f_scale(Vec4f_t a, float scalar) Vec4f_t vec4f_scale(Vec4f_t a, float scalar)
{ {
Vec4f_t vec = vec4f_clone(&a); vec4f_scale_r(&a, scalar);
vec4f_scale_r(&vec, scalar); return a;
return vec;
} }
//float vec4f_dot(Vec4f_t a, Vec4f_t b) //float vec4f_dot(Vec4f_t a, Vec4f_t b)

View File

@@ -1,20 +1,7 @@
#ifndef VECTOR4_H #ifndef VECTOR4_H
#define VECTOR4_H #define VECTOR4_H
#include <stdlib.h> // must be aligned by 16 Bytes (less instruction executed for SSE)
#include <stdlib.h>
#include <string.h>
#if defined(__x86_64__) || defined(__i386__)
#define SIMD_X86
#include <xmmintrin.h>
#elif defined(__aarch64__) || defined(__arm64__)
#define SIMD_ARCH
#include <arm_neon.h>
#else
#endif
// must be aligned by 16 Bytes (less instruction executed for SSE)
typedef union typedef union
{ {
struct { float x, y, z, w; }; struct { float x, y, z, w; };