diff --git a/CMakeLists.txt b/CMakeLists.txt index 56fffdb..38ed24b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,7 @@ project(t3dsr C) set(CMAKE_C_STANDARD 17) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra") -include_directories(src) - file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS src/*.c) +file(GLOB_RECURSE HEADERS CONFIGURE_DEPENDS src/*.h) -add_executable(main ${SOURCES}) \ No newline at end of file +add_executable(main ${SOURCES} ${HEADERS}) \ No newline at end of file diff --git a/src/main.c b/src/main.c index 573831a..08bab1f 100644 --- a/src/main.c +++ b/src/main.c @@ -2,10 +2,19 @@ #include "math/vec4.h" int main(void) -{ - Vec4f_t vec = vec4(1.f, 2, 3, 4); - float vec_array[4] = {1, 2, 3, 4}; - - vec4f_add_r(&vec, vec4f_from_array(vec_array)); - printf("%f\n", vec.data[1]); -} \ No newline at end of file +{ + Vec4f_t vec = vec4(1.f, 2.f, 8.f, 4.f); + printf("%f %f %f %f\n", vec.x, vec.y, vec.z, vec.w); + + Vec4f_t vec2 = vec4f_clone(&vec); + printf("%f %f %f %f\n", vec2.x, vec2.y, vec2.z, vec2.w); + + Vec4f_t vec3 = vec4f_add(vec, vec2); + printf("%f %f %f %f\n", vec3.x, vec3.y, vec3.z, vec3.w); + + Vec4f_t vec4 = vec4f_sub(vec, vec3); + printf("%f %f %f %f\n", vec4.x, vec4.y, vec4.z, vec4.w); + + Vec4f_t vec5 = vec4f_scale(vec4, 5.f); + printf("%f %f %f %f\n", vec5.x, vec5.y, vec5.z, vec5.w); +} diff --git a/src/math/mat4.c b/src/math/mat4.c index 377e133..2d549bf 100644 --- a/src/math/mat4.c +++ b/src/math/mat4.c @@ -88,20 +88,20 @@ Mat4_t mat4_scl(const Mat4_t* m, float scalar) Mat4_t mat4_mul(const Mat4_t* m1, const Mat4_t* m2) { - Mat4_t mat; - + Mat4_t mat; + for(int i = 0; i<4; i++) { int i3 = i * 3; for (int j = 0; j < 4; j++) { float sum = 0; for (int k = 0; k < 3; k++) { - sum += m1->m[i3 + k] * m2->m[k*3 + j]; + sum += m1->m[i3 + k] * m2->m[k*3 + j]; } - + mat.m[i3 + j] = sum; } } - + return mat; -} \ No newline at end of file +} diff --git a/src/math/vec4.c b/src/math/vec4.c index fbc1f6d..1726ec4 100644 --- a/src/math/vec4.c +++ b/src/math/vec4.c @@ -14,6 +14,54 @@ #include "vec4.h" +Vec4f_t vec4(float x, float y, float z, float w) +{ + return (Vec4f_t){.x = x, .y = y, .z = z, .w = w}; +} + +Vec4f_t vec4f_from_array(float *__restrict val) +{ + Vec4f_t vec; + memcpy(vec.data, val, 4*sizeof(float)); + return vec; +} + +// clone entire vec4f_t +Vec4f_t vec4f_clone(Vec4f_t *__restrict v) +{ + Vec4f_t vec; + memcpy(&vec, v, sizeof(Vec4f_t)); + return vec; +} + +Vec4f_t vec4f_scalar(float f) +{ + Vec4f_t vec4; + +// store f x 4 in register +// add all register into data +#if defined(SIMD_X86) + __m128 scalar = _mm_set1_ps(f); + _mm_storeu_ps(vec4.data, scalar); + +#elif defined(SIMD_ARCH) + float32x4_t scalar = vdupq_n_f32(f); + vst1q_f32(vec4.data, scalar); + +// add one by one each value to their specific address +#else + for (int i = 0; i < 4; i++) { + vec4.data[i] = f; + } +#endif + return vec4; +} + +Vec4f_t Vec4f_zero(void) +{ + return vec4f_scalar(0.f); +} + Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a) { #if defined (SIMD_X86) @@ -35,20 +83,69 @@ Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a) return *out; } -Vec4f_t vec4_add(Vec4f_t v1, Vec4f_t v2) +Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b) { - return vec4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w); + Vec4f_t vec = vec4f_clone(&a); + vec4f_add_r(&vec, b); + return vec; } -// Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2) -// { -// return vec4(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w); -// } +Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a) +{ +#if defined (SIMD_X86) + __m128 va = _mm_load_ps(a.data); + __m128 vb = _mm_load_ps(out->data); + __m128 vres = _mm_sub_ps(va, vb); + _mm_store_ps(out->data, vres); -// Vec4_t vec4_scale(Vec4_t v, float scalar) -// { -// return vec4(v.x * scalar, v.y * scalar, v.z * scalar, v.w * scalar); -// } +#elif defined (SIMD_ARCH) + float32x4_t va = vld1q_f32(a.data); + float32x4_t vb = vld1q_f32(out->data); + float32x4_t vres = vsubq_f32(va, vb); + vst1q_f32(out->data, vres); + +#else + for(int i = 0; i<4; i++) { + out->data[i] -= a.data[i]; + } +#endif + return *out; +} + +Vec4f_t vec4f_sub(Vec4f_t a, Vec4f_t b) +{ + Vec4f_t vec = vec4f_clone(&a); + vec4f_sub_r(&vec, b); + return vec; +} + +Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scalar) +{ +#if defined (SIMD_X86) + __m128 va = _mm_load_ps(out->data); + __m128 vb = _mm_set1_ps(scalar); + __m128 vres = _mm_mul_ps(va, vb); + _mm_store_ps(out->data, vres); + +#elif defined (SIMD_ARCH) + float32x4_t va = vld1q_f32(out->data); + float32x4_t vb = vdupq_n_f32(scalar); + float32x4_t vres = vmulq_f32(va, vb); + vst1q_f32(out->data, vres); +#else + for(int i = 0; i<4; i++) { + out->data[i] *= scalar; + } +#endif + return *out; +} + +Vec4f_t vec4f_scale(Vec4f_t a, float scalar) +{ + Vec4f_t vec = vec4f_clone(&a); + vec4f_scale_r(&vec, scalar); + return vec; +} // float vec4_dot(Vec4_t a, Vec4_t b) // { diff --git a/src/math/vec4.h b/src/math/vec4.h index 0882d40..34b59a2 100644 --- a/src/math/vec4.h +++ b/src/math/vec4.h @@ -20,58 +20,22 @@ typedef union float data[4]; } Vec4f_t; -static inline Vec4f_t vec4f_from_array(float *__restrict val) -{ - Vec4f_t vec4; - memcpy(vec4.data, val, 4*sizeof(float)); - return vec4; -} - -static inline Vec4f_t vec4(float x, float y, float z, float w) -{ - return (Vec4f_t){x, y, z, w}; -} - +Vec4f_t vec4f_from_array(float *__restrict val); +Vec4f_t vec4(float x, float y, float z, float w); // (f, f, f, f) -static inline Vec4f_t vec4f_scalar(float f) { - Vec4f_t vec4; - -// store f x 4 in register -// add all register into data -#if defined(SIMD_X86) - __m128 scalar = _mm_set1_ps(f); - _mm_storeu_ps(vec4.data, scalar); - -#elif defined(SIMD_ARCH) - float32x4_t scalar = vdupq_n_f32(f); - vst1q_f32(vec4.data, scalar); - -// add one by one each value to their specific address -#else - for (int i = 0; i < 4; i++) { - vec4.data[i] = f; - } -#endif - return vec4; -} - +Vec4f_t vec4f_scalar(float f); // (0, 0, 0, 0) -static inline Vec4f_t Vec4f_zero(void) -{ - return vec4f_scalar(0.f); -} +Vec4f_t vec4f_zero(void); +Vec4f_t vec4f_clone(Vec4f_t *__restrict v); Vec4f_t vec4f_add_r(Vec4f_t *__restrict out, Vec4f_t a); Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b); +Vec4f_t vec4f_sub_r(Vec4f_t *__restrict out, Vec4f_t a); +Vec4f_t vec4f_sub(Vec4f_t a, Vec4f_t b); -// Vec4_t vec4(float x, float y, float z, float w); - -// Vec4_t vec4_add(Vec4_t v1, Vec4_t v2); - -// Vec4_t vec4_sub(Vec4_t v1, Vec4_t v2); - -// Vec4_t vec4_scale(Vec4_t v, float scalar); +Vec4f_t vec4f_scale_r(Vec4f_t *__restrict out, float scale); +Vec4f_t vec4f_scale(Vec4f_t a, float scale); // float vec4_dot(Vec4_t a, Vec4_t b);