feat: rework math files

- add SSE functions
- remove vec3 / mat3
This commit is contained in:
2025-06-10 22:57:31 +02:00
parent 35d7f0da36
commit 8e26c51370
8 changed files with 208 additions and 379 deletions

View File

@@ -1,7 +1,11 @@
#include "math/vec3.h"
#include <stdio.h>
#include "math/vec4.h"
int main(void)
{
return 0;
Vec4f_t vec = vec4(1.f, 2, 3, 4);
float vec_array[4] = {1, 2, 3, 4};
vec4f_add_r(&vec, vec4f_from_array(vec_array));
printf("%f\n", vec.data[1]);
}

View File

@@ -1,118 +0,0 @@
#include "mat3.h"
#include <string.h>
Mat3_t mat3(const float arr[9])
{
Mat3_t mat;
memcpy(mat.m, arr, 9*sizeof(float));
return mat;
}
Mat3_t mat3_zro(void)
{
return (Mat3_t){0};
}
Mat3_t mat3_ity(void)
{
return (Mat3_t) {{
1, 0, 0,
0, 1, 0,
0, 0, 1
}};
}
Mat3_t mat3_add(const Mat3_t* m1, const Mat3_t* m2)
{
Mat3_t mat;
for(int i = 0; i<9; i++) {
mat.m[i] = m1->m[i] + m2->m[i];
}
return mat;
}
Mat3_t mat3_sub(const Mat3_t* m1, const Mat3_t* m2)
{
Mat3_t mat;
for(int i = 0; i<9; i++) {
mat.m[i] = m1->m[i] - m2->m[i];
}
return mat;
}
Mat3_t mat3_scl(const Mat3_t* m, float scalar)
{
Mat3_t mat;
for(int i = 0; i<9; i++) {
mat.m[i] = m->m[i] * scalar;
}
return mat;
}
Mat3_t mat3_mul(const Mat3_t* m1, const Mat3_t* m2)
{
Mat3_t mat;
for(int i = 0; i<3; i++) {
int i3 = i * 3;
for (int j = 0; j < 3; j++) {
float sum = 0;
for (int k = 0; k < 3; k++) {
sum += m1->m[i3 + k] * m2->m[k*3 + j];
}
mat.m[i3 + j] = sum;
}
}
return mat;
}
Mat3_t mat3_tpo(const Mat3_t* m)
{
Mat3_t mat;
for(int i = 0; i<3; i++) {
int i3 = i * 3;
for (int j = 0; j < 3; j++) {
mat.m[i3 + j] = m->m[j*3 + i];
}
}
return mat;
}
float mat3_det(const Mat3_t* m)
{
return m->m[0] * (m->m[4] * m->m[8] - m->m[5] * m->m[7]) -
m->m[1] * (m->m[3] * m->m[8] - m->m[5] * m->m[6]) +
m->m[2] * (m->m[3] * m->m[7] - m->m[4] * m->m[6]);
}
Mat3_t mat3_inv(const Mat3_t* m) {
Mat3_t inv;
float det = mat3_det(m);
if (det == 0) return mat3_ity();
float invDet = 1.0f / det;
inv.m[0] = (m->m[4] * m->m[8] - m->m[5] * m->m[7]) * invDet;
inv.m[1] = -(m->m[1] * m->m[8] - m->m[2] * m->m[7]) * invDet;
inv.m[2] = (m->m[1] * m->m[5] - m->m[2] * m->m[4]) * invDet;
inv.m[3] = -(m->m[3] * m->m[8] - m->m[5] * m->m[6]) * invDet;
inv.m[4] = (m->m[0] * m->m[8] - m->m[2] * m->m[6]) * invDet;
inv.m[5] = -(m->m[0] * m->m[5] - m->m[2] * m->m[3]) * invDet;
inv.m[6] = (m->m[3] * m->m[7] - m->m[4] * m->m[6]) * invDet;
inv.m[7] = -(m->m[0] * m->m[7] - m->m[1] * m->m[6]) * invDet;
inv.m[8] = (m->m[0] * m->m[4] - m->m[1] * m->m[3]) * invDet;
return inv;
}

View File

@@ -1,29 +0,0 @@
#ifndef MATRIX3_H
#define MATRIX3_H
typedef struct
{
float m[9];
} Mat3_t;
Mat3_t mat3(const float arr[9]);
Mat3_t mat3_zro(void);
Mat3_t mat3_ity(void);
Mat3_t mat3_add(const Mat3_t* m1, const Mat3_t* m2);
Mat3_t mat3_sub(const Mat3_t* m1, const Mat3_t* m2);
Mat3_t mat3_scl(const Mat3_t* m, float scalar);
Mat3_t mat3_mul(const Mat3_t* m1, const Mat3_t* m2);
Mat3_t mat3_tpo(const Mat3_t* m);
float mat3_det(const Mat3_t* m);
Mat3_t mat3_inv(const Mat3_t* m);
#endif // MATRIX3_H

View File

@@ -1,5 +1,15 @@
#include "mat4.h"
#include <string.h>
#include <stdio.h>
#if defined(__x86_64__) || defined(__i386__)
#include <xmmintrin.h> // SSE
#elif defined(__aarch64__) || defined(__arm64__) || defined(__ARM_NEON)
#include <arm_neon.h> // NEON
#else
#warning "SIMD intrinsics not enabled for this architecture"
#endif
Mat4_t mat4(const float arr[16])
{
@@ -27,9 +37,29 @@ Mat4_t mat4_add(const Mat4_t* m1, const Mat4_t* m2)
{
Mat4_t mat;
for(int i = 0; i<16; i++) {
#if defined(__x86_64__) || defined(__i386__)
// SSE : addition 4 floats en parallèle
for (int i = 0; i < 16; i += 4) {
__m128 a = _mm_loadu_ps(&m1->m[i]);
__m128 b = _mm_loadu_ps(&m2->m[i]);
__m128 c = _mm_add_ps(a, b);
_mm_storeu_ps(&mat.m[i], c);
}
#elif defined(__aarch64__)
printf("hello world");
// NEON : addition 4 floats en parallèle
for (int i = 0; i < 16; i += 4) {
float32x4_t a = vld1q_f32(&m1->m[i]);
float32x4_t b = vld1q_f32(&m2->m[i]);
float32x4_t c = vaddq_f32(a, b);
vst1q_f32(&mat.m[i], c);
}
#else
// Fallback classique
for (int i = 0; i < 16; i++) {
mat.m[i] = m1->m[i] + m2->m[i];
}
#endif
return mat;
}

View File

@@ -1,106 +0,0 @@
#include <math.h>
#include <stdlib.h>
#include <float.h>
#include "vec3.h"
inline Vec3_t vec3(float x, float y, float z)
{
return (Vec3_t) {x, y, z};
}
Vec3_t vec3_add(Vec3_t v1, Vec3_t v2)
{
return vec3(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z);
}
Vec3_t vec3_sub(Vec3_t v1, Vec3_t v2)
{
return vec3(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z);
}
Vec3_t vec3_scale(Vec3_t v, float scalar)
{
return vec3(v.x * scalar, v.y * scalar, v.z * scalar);
}
float vec3_dot(Vec3_t a, Vec3_t b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
float vec3_len(Vec3_t v)
{
return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
}
Vec3_t vec3_norm(Vec3_t v)
{
float length = vec3_len(v);
if (length == 0.f) return vec3(0, 0, 0);
return vec3_scale(v, 1.f / length);
}
Vec3_t vec3_lerp(Vec3_t a, Vec3_t b, float t)
{
t = fmaxf(0.f, fminf(t, 1.f));
return vec3(
a.x + t * (b.x - a.x),
a.y + t * (b.y - a.y),
a.z + t * (b.z - a.z)
);
}
Vec3_t vec3_cross(Vec3_t a, Vec3_t b)
{
return vec3(
a.y * b.z - a.z * b.y,
a.z * b.x - a.x * b.z,
a.x * b.y - a.y * b.x
);
}
float vec3_angle(Vec3_t a, Vec3_t b)
{
float lenA = vec3_len(a);
float lenB = vec3_len(b);
if (lenA < FLT_EPSILON || lenB < FLT_EPSILON)
return NAN;
return acosf(fmaxf(-1.f,
fminf(vec3_dot(a, b) / (lenA * lenB), 1.f)));
}
Vec3_t vec3_proj(Vec3_t a, Vec3_t b)
{
return vec3_scale(b,
vec3_dot(a, b) / vec3_dot(b, b));
}
Vec3_t vec3_refl(Vec3_t v, Vec3_t normal)
{
return vec3_sub(v, vec3_scale(vec3_proj(v, normal), 2.f));
}
float vec3_dist(Vec3_t a, Vec3_t b)
{
return vec3_len(vec3_sub(a, b));
}
Vec3_t vec3_rotate(Vec3_t v, Vec3_t axis, float angle)
{
Vec3_t normAxis = vec3_norm(axis);
Vec3_t rlt = vec3_add(
vec3_add(
vec3_scale(v, cosf(angle)),
vec3_scale(vec3_cross(normAxis, v), sinf(angle))
),
vec3_scale(normAxis, vec3_dot(normAxis, v) * (1 - cosf(angle)))
);
return rlt;
}

View File

@@ -1,37 +0,0 @@
#ifndef VEC3__H
#define VEC3__H
typedef struct
{
float x, y, z;
} Vec3_t;
Vec3_t vec3(float x, float y, float z);
Vec3_t vec3_add(Vec3_t v1, Vec3_t v2);
Vec3_t vec3_sub(Vec3_t v1, Vec3_t v2);
Vec3_t vec3_scale(Vec3_t v, float scalar);
float vec3_dot(Vec3_t a, Vec3_t b);
float vec3_len(Vec3_t v);
Vec3_t vec3_norm(Vec3_t v);
Vec3_t vec3_lerp(Vec3_t a, Vec3_t b, float t);
Vec3_t vec3_cross(Vec3_t a, Vec3_t b);
float vec3_angle(Vec3_t a, Vec3_t b);
Vec3_t vec3_proj(Vec3_t a, Vec3_t b);
Vec3_t vec3_refl(Vec3_t v, Vec3_t normal);
float vec3_rist(Vec3_t a, Vec3_t b);
Vec3_t vec3_rotate(Vec3_t v, Vec3_t axis, float angle);
#endif // VEC3__H

View File

@@ -1,97 +1,123 @@
#include <math.h>
#include <stdlib.h>
#include <float.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#if defined(__x86_64__) || defined(__i386__)
#define SIMD_X86
#include <xmmintrin.h>
#elif defined(__aarch64__) || defined(__arm64__)
#define SIMD_ARCH
#include <arm_neon.h>
#else
#endif
#include "vec4.h"
inline Vec4_t vec4(float x, float y, float z, float w)
Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a)
{
return (Vec4_t) {x, y, z, w};
#if defined (SIMD_X86)
__m128 va = _mm_load_ps(a.data);
__m128 vb = _mm_load_ps(out->data);
__m128 vres = _mm_add_ps(va, vb);
_mm_store_ps(out->data, vres);
#elif defined (SIMD_ARCH)
float32x4_t va = vld1q_f32(a.data);
float32x4_t vb = vld1q_f32(out->data);
float32x4_t vres = vaddq_f32(va, vb);
vst1q_f32(out->data, vres);
#else
for(int i = 0; i<4; i++) {
out->data[i] += a.data[i];
}
#endif
return *out;
}
Vec4_t vec4_add(Vec4_t v1, Vec4_t v2)
Vec4f_t vec4_add(Vec4f_t v1, Vec4f_t v2)
{
return vec4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w);
}
Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2)
{
return vec4(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w);
}
// Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2)
// {
// return vec4(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w);
// }
Vec4_t vec4_scale(Vec4_t v, float scalar)
{
return vec4(v.x * scalar, v.y * scalar, v.z * scalar, v.w * scalar);
}
// Vec4_t vec4_scale(Vec4_t v, float scalar)
// {
// return vec4(v.x * scalar, v.y * scalar, v.z * scalar, v.w * scalar);
// }
float vec4_dot(Vec4_t a, Vec4_t b)
{
return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
}
// float vec4_dot(Vec4_t a, Vec4_t b)
// {
// return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
// }
float vec4_len(Vec4_t v)
{
return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
}
// float vec4_len(Vec4_t v)
// {
// return sqrtf(v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w);
// }
Vec4_t vec4_norm(Vec4_t v)
{
float length = vec4_len(v);
if (length == 0.f) return vec4(0, 0, 0, 0);
// Vec4_t vec4_norm(Vec4_t v)
// {
// float length = vec4_len(v);
// if (length == 0.f) return vec4(0, 0, 0, 0);
return vec4_scale(v, 1.f / length);
}
// return vec4_scale(v, 1.f / length);
// }
Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t)
{
t = fmaxf(0.f, fminf(t, 1.f));
// Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t)
// {
// t = fmaxf(0.f, fminf(t, 1.f));
return vec4(
a.x + t * (b.x - a.x),
a.y + t * (b.y - a.y),
a.z + t * (b.z - a.z),
a.w + t * (b.w - a.w)
);
}
// return vec4(
// a.x + t * (b.x - a.x),
// a.y + t * (b.y - a.y),
// a.z + t * (b.z - a.z),
// a.w + t * (b.w - a.w)
// );
// }
float vec4Angle(Vec4_t a, Vec4_t b)
{
float lenA = vec4_len(a);
float lenB = vec4_len(b);
// float vec4Angle(Vec4_t a, Vec4_t b)
// {
// float lenA = vec4_len(a);
// float lenB = vec4_len(b);
if (isnan(lenA) || isnan(lenB)
|| lenA < FLT_EPSILON
|| lenB < FLT_EPSILON)
return NAN;
// if (isnan(lenA) || isnan(lenB)
// || lenA < FLT_EPSILON
// || lenB < FLT_EPSILON)
// return NAN;
float dot = vec4_dot(a, b);
float cosTheta = dot / (lenA * lenB);
// float dot = vec4_dot(a, b);
// float cosTheta = dot / (lenA * lenB);
cosTheta = fmaxf(-1.f, fminf(cosTheta, 1.f));
// cosTheta = fmaxf(-1.f, fminf(cosTheta, 1.f));
return acosf(cosTheta);
}
// return acosf(cosTheta);
// }
Vec4_t vec4_proj(Vec4_t a, Vec4_t b)
{
float dotA = vec4_dot(a, b);
float dotB = vec4_dot(b, b);
// Vec4_t vec4_proj(Vec4_t a, Vec4_t b)
// {
// float dotA = vec4_dot(a, b);
// float dotB = vec4_dot(b, b);
float scale = dotA / dotB;
return vec4_scale(b, scale);
}
// float scale = dotA / dotB;
// return vec4_scale(b, scale);
// }
Vec4_t vec4_refl(Vec4_t v, Vec4_t normal)
{
Vec4_t proj = vec4_proj(v, normal);
Vec4_t scal = vec4_scale(proj, 2.f);
Vec4_t rlt = vec4_sub(v, scal);
return rlt;
}
// Vec4_t vec4_refl(Vec4_t v, Vec4_t normal)
// {
// Vec4_t proj = vec4_proj(v, normal);
// Vec4_t scal = vec4_scale(proj, 2.f);
// Vec4_t rlt = vec4_sub(v, scal);
// return rlt;
// }
float vec4_dist(Vec4_t a, Vec4_t b)
{
Vec4_t vsub = vec4_sub(a, b);
float rlt = vec4_len(vsub);
return rlt;
}
// float vec4_dist(Vec4_t a, Vec4_t b)
// {
// Vec4_t vsub = vec4_sub(a, b);
// float rlt = vec4_len(vsub);
// return rlt;
// }

View File

@@ -1,33 +1,92 @@
#ifndef VECTOR4_H
#define VECTOR4_H
typedef struct
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#if defined(__x86_64__) || defined(__i386__)
#define SIMD_X86
#include <xmmintrin.h>
#elif defined(__aarch64__) || defined(__arm64__)
#define SIMD_ARCH
#include <arm_neon.h>
#else
#endif
typedef union
{
float x, y, z, w;
} Vec4_t;
struct { float x, y, z, w; };
float data[4];
} Vec4f_t;
Vec4_t vec4(float x, float y, float z, float w);
static inline Vec4f_t vec4f_from_array(float *__restrict val)
{
Vec4f_t vec4;
memcpy(vec4.data, val, 4*sizeof(float));
return vec4;
}
Vec4_t vec4_add(Vec4_t v1, Vec4_t v2);
static inline Vec4f_t vec4(float x, float y, float z, float w)
{
return (Vec4f_t){x, y, z, w};
}
Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2);
// (f, f, f, f)
static inline Vec4f_t vec4f_scalar(float f) {
Vec4f_t vec4;
Vec4_t vec4_scale(Vec4_t v, float scalar);
// store f x 4 in register
// add all register into data
#if defined(SIMD_X86)
__m128 scalar = _mm_set1_ps(f);
_mm_storeu_ps(vec4.data, scalar);
float vec4_dot(Vec4_t a, Vec4_t b);
#elif defined(SIMD_ARCH)
float32x4_t scalar = vdupq_n_f32(f);
vst1q_f32(vec4.data, scalar);
float vec4_len(Vec4_t v);
// add one by one each value to their specific address
#else
for (int i = 0; i < 4; i++) {
vec4.data[i] = f;
}
#endif
return vec4;
}
Vec4_t vec4_norm(Vec4_t v);
// (0, 0, 0, 0)
static inline Vec4f_t Vec4f_zero(void)
{
return vec4f_scalar(0.f);
}
Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t);
Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a);
Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b);
float vec4_angle(Vec4_t a, Vec4_t b);
Vec4_t vec4_proj(Vec4_t a, Vec4_t b);
// Vec4_t vec4(float x, float y, float z, float w);
Vec4_t vec4_refl(Vec4_t v, Vec4_t normal);
// Vec4_t vec4_add(Vec4_t v1, Vec4_t v2);
float vec4_dist(Vec4_t a, Vec4_t b);
// Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2);
// Vec4_t vec4_scale(Vec4_t v, float scalar);
// float vec4_dot(Vec4_t a, Vec4_t b);
// float vec4_len(Vec4_t v);
// Vec4_t vec4_norm(Vec4_t v);
// Vec4_t vec4_lerp(Vec4_t a, Vec4_t b, float t);
// float vec4_angle(Vec4_t a, Vec4_t b);
// Vec4_t vec4_proj(Vec4_t a, Vec4_t b);
// Vec4_t vec4_refl(Vec4_t v, Vec4_t normal);
// float vec4_dist(Vec4_t a, Vec4_t b);
#endif // VECTOR4_H