From 187908ccec324b7ed973f8b73783d7b434e049df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20GUEZO?= Date: Sat, 21 Jun 2025 13:48:10 +0200 Subject: [PATCH] feat: add vec3 and vec4 functions --- src/main.c | 2 +- src/math/vec3.c | 139 ++++++++++++++++++++++++++++++++++++++++++++++++ src/math/vec3.h | 40 ++++++++++++++ src/math/vec4.c | 44 ++++++++++----- src/math/vec4.h | 6 +-- 5 files changed, 213 insertions(+), 18 deletions(-) create mode 100644 src/math/vec3.c create mode 100644 src/math/vec3.h diff --git a/src/main.c b/src/main.c index 08bab1f..fb88ce6 100644 --- a/src/main.c +++ b/src/main.c @@ -3,7 +3,7 @@ int main(void) { - Vec4f_t vec = vec4(1.f, 2.f, 8.f, 4.f); + Vec4f_t vec = vec4f(1.f, 2.f, 8.f, 4.f); printf("%f %f %f %f\n", vec.x, vec.y, vec.z, vec.w); Vec4f_t vec2 = vec4f_clone(&vec); diff --git a/src/math/vec3.c b/src/math/vec3.c new file mode 100644 index 0000000..568bffa --- /dev/null +++ b/src/math/vec3.c @@ -0,0 +1,139 @@ +// +// vec3.c +// main +// +// Created by Loïc GUEZO on 21/06/2025. +// + +#include "vec3.h" + +#define VEC_SIZE 3 + +Vec3f_t vec3f(float x, float y, float z) +{ + return (Vec3f_t){.x = x, .y = y, .z = z}; +} + +Vec3f_t vec3f_from_array(const float *__restrict val) +{ + Vec3f_t vec; +#if defined (SIMD_X86) + __m128 arr = _mm_loadu_ps(val); + _mm_store_ps(vec.data, arr); +#elif defined (SIMD_ARCH) + float32x4_t arr = vld1q_f32(val); + vst1q_f32(vec.data, arr); +#else + for (int i = 0; i < VEC_SIZE; i++) { + vec.data[i] = val[i]; + } +#endif + return vec; +} + +Vec3f_t vec3f_scalar(float f) +{ + Vec3f_t vec; +#if defined(SIMD_X86) + __m128 scalar = _mm_set1_ps(f); + _mm_store_ps(vec.data, scalar); +#elif defined(SIMD_ARCH) + float32x4_t scalar = vdupq_n_f32(f); + vst1q_f32(vec.data, scalar); +#else + for (int i = 0; i < VEC_SIZE; i++) { + vec.data[i] = f; + } +#endif + return vec; +} + +Vec3f_t vec3f_zero(void) +{ + return vec3f_scalar(0.f); +} + +Vec3f_t vec3f_add_r(Vec3f_t *__restrict out, Vec3f_t a) +{ +#if defined (SIMD_X86) + __m128 va = _mm_load_ps(a.data); + __m128 vb = _mm_load_ps(out->data); + __m128 vres = _mm_add_ps(va, vb); + _mm_store_ps(out->data, vres); +#elif defined (SIMD_ARCH) + float32x4_t va = vld1q_f32(a.data); + float32x4_t vb = vld1q_f32(out->data); + float32x4_t vres = vaddq_f32(va, vb); + vst1q_f32(out->data, vres); +#else + for (int i = 0; i < VEC_SIZE; i++) { + out->data[i] += a.data[i]; + } +#endif + return *out; +} + +Vec3f_t vec3f_add(Vec3f_t a, Vec3f_t b) +{ + return vec3f_add_r(&a, b); +} + +Vec3f_t vec3f_sub_r(Vec3f_t *__restrict out, Vec3f_t a) +{ +#if defined (SIMD_X86) + __m128 va = _mm_load_ps(out->data); + __m128 vb = _mm_load_ps(a.data); + __m128 vres = _mm_sub_ps(va, vb); + _mm_store_ps(out->data, vres); +#elif defined (SIMD_ARCH) + float32x4_t va = vld1q_f32(out->data); + float32x4_t vb = vld1q_f32(a.data); + float32x4_t vres = vsubq_f32(va, vb); + vst1q_f32(out->data, vres); +#else + for (int i = 0; i < VEC_SIZE; i++) { + out->data[i] -= a.data[i]; + } +#endif + return *out; +} + +Vec3f_t vec3f_sub(Vec3f_t a, Vec3f_t b) +{ + return vec3f_sub_r(&a, b); +} + +Vec3f_t vec3f_scale_r(Vec3f_t *__restrict out, float scalar) +{ +#if defined (SIMD_X86) + __m128 va = _mm_load_ps(out->data); + __m128 vb = _mm_set1_ps(scalar); + __m128 vres = _mm_mul_ps(va, vb); + _mm_store_ps(out->data, vres); +#elif defined (SIMD_ARCH) + float32x4_t va = vld1q_f32(out->data); + float32x4_t vb = vdupq_n_f32(scalar); + float32x4_t vres = vmulq_f32(va, vb); + vst1q_f32(out->data, vres); +#else + for (int i = 0; i < VEC_SIZE; i++) { + out->data[i] *= scalar; + } +#endif + return *out; +} + +Vec3f_t vec3f_scale(Vec3f_t a, float scalar) +{ + return vec3f_scale_r(&a, scalar); +} + + +//Vec3f_t vec3f_add_r(Vec3f_t *__restrict out, Vec3f_t a); +//Vec3f_t vec3f_add(Vec3f_t a, Vec3f_t b); +// +//Vec3f_t vec3f_sub_r(Vec3f_t *__restrict out, Vec3f_t a); +//Vec3f_t vec3f_sub(Vec3f_t a, Vec3f_t b); +// +//Vec3f_t vec3f_scale_r(Vec3f_t *__restrict out, float scale); +//Vec3f_t vec3f_scale(Vec3f_t a, float scale); diff --git a/src/math/vec3.h b/src/math/vec3.h new file mode 100644 index 0000000..1733022 --- /dev/null +++ b/src/math/vec3.h @@ -0,0 +1,40 @@ +// +// vec3.h +// main +// +// Created by Loïc GUEZO on 21/06/2025. +// + +#ifndef vec3_h +#define vec3_h + +#include "common_math.h" + +typedef union +{ + struct {float x, y, z; }; + float data[3]; +} __attribute__((aligned(16))) Vec3f_t; + +Vec3f_t vec3f_from_array(const float *__restrict val); +Vec3f_t vec3f(float x, float y, float z); +// (f, f, f) +Vec3f_t vec3f_scalar(float f); +// (0, 0, 0) +Vec3f_t vec3f_zero(void); + +inline static Vec3f_t vec3f_clone(const Vec3f_t *__restrict v) +{ + return *v; +} + +Vec3f_t vec3f_add_r(Vec3f_t *__restrict out, Vec3f_t a); +Vec3f_t vec3f_add(Vec3f_t a, Vec3f_t b); + +Vec3f_t vec3f_sub_r(Vec3f_t *__restrict out, Vec3f_t a); +Vec3f_t vec3f_sub(Vec3f_t a, Vec3f_t b); + +Vec3f_t vec3f_scale_r(Vec3f_t *__restrict out, float scale); +Vec3f_t vec3f_scale(Vec3f_t a, float scale); + +#endif /* vec3_h */ diff --git a/src/math/vec4.c b/src/math/vec4.c index a6bfd2f..126d754 100644 --- a/src/math/vec4.c +++ b/src/math/vec4.c @@ -1,19 +1,31 @@ #include "vec4.h" #include "common_math.h" -Vec4f_t vec4(float x, float y, float z, float w) +#define VEC_SIZE 4 + +Vec4f_t vec4f(float x, float y, float z, float w) { return (Vec4f_t){.x = x, .y = y, .z = z, .w = w}; } -Vec4f_t vec4f_from_array(float *__restrict val) +Vec4f_t vec4f_from_array(const float *__restrict val) { Vec4f_t vec; - memcpy(vec.data, val, 4*sizeof(float)); +#if defined (SIMD_X86) + __m128 arr = _mm_load_ps(val); + _mm_store_ps(vec.data, arr); +#elif defined (SIMD_ARCH) + float32x4_t arr = vld1q_f32(val); + vst1q_f32(vec.data, arr); +#else + for(int i = 0; idata); __m128 vres = _mm_add_ps(va, vb); - _mm_storeu_ps(out->data, vres); + _mm_store_ps(out->data, vres); #elif defined (SIMD_ARCH) float32x4_t va = vld1q_f32(a.data); @@ -55,7 +67,7 @@ Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a) float32x4_t vres = vaddq_f32(va, vb); vst1q_f32(out->data, vres); #else - for(int i = 0; i<4; i++) { + for(int i = 0; idata[i] += a.data[i]; } #endif @@ -74,7 +86,7 @@ Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a) __m128 va = _mm_load_ps(out->data); __m128 vb = _mm_load_ps(a.data); __m128 vres = _mm_sub_ps(va, vb); - _mm_storeu_ps(out->data, vres); + _mm_store_ps(out->data, vres); #elif defined (SIMD_ARCH) float32x4_t va = vld1q_f32(a.data); @@ -83,7 +95,7 @@ Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a) vst1q_f32(out->data, vres); #else - for(int i = 0; i<4; i++) { + for(int i = 0; idata[i] -= a.data[i]; } #endif @@ -102,7 +114,7 @@ Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar) __m128 va = _mm_load_ps(out->data); __m128 vb = _mm_set1_ps(scalar); __m128 vres = _mm_mul_ps(va, vb); - _mm_storeu_ps(out->data, vres); + _mm_store_ps(out->data, vres); #elif defined (SIMD_ARCH) float32x4_t va = vld1q_f32(out->data); @@ -125,17 +137,21 @@ Vec4f_t vec4f_scale(Vec4f_t a, float scalar) //float vec4f_dot(Vec4f_t a, Vec4f_t b) //{ +// float result; //#if defined (SIMD_X86) // __m128 va = _mm_load_ps(a.data); // __m128 vb = _mm_load_ps(b.data); // __m128 vres = _mm_mul_ps(va, vb); -// return +// +// __m128 shuf = +// result = 0.f; // //#elif defined (SIMD_ARCH) -// +// result = 0.f; //#else -// +// result = a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; //#endif +// return result; //} // float vec4_dot(Vec4_t a, Vec4_t b) diff --git a/src/math/vec4.h b/src/math/vec4.h index fe54e3d..4052179 100644 --- a/src/math/vec4.h +++ b/src/math/vec4.h @@ -8,14 +8,14 @@ typedef union float data[4]; }__attribute__((aligned(16))) Vec4f_t; -Vec4f_t vec4f_from_array(float *__restrict val); -Vec4f_t vec4(float x, float y, float z, float w); +Vec4f_t vec4f_from_array(const float *__restrict val); +Vec4f_t vec4f(float x, float y, float z, float w); // (f, f, f, f) Vec4f_t vec4f_scalar(float f); // (0, 0, 0, 0) Vec4f_t vec4f_zero(void); -inline static Vec4f_t vec4f_clone(Vec4f_t *__restrict v) +inline static Vec4f_t vec4f_clone(const Vec4f_t *__restrict v) { return *v; }