diff --git a/src/math/common_math.h b/src/math/mconfig.h similarity index 55% rename from src/math/common_math.h rename to src/math/mconfig.h index d89a9df..0e5abb6 100644 --- a/src/math/common_math.h +++ b/src/math/mconfig.h @@ -1,7 +1,7 @@ // Common math library -#ifndef MATH_H -#define MATH_H +#ifndef MCONFIG_H +#define MCONFIG_H #include #include @@ -10,11 +10,22 @@ #if defined(__x86_64__) || defined(__i386__) #define SIMD_X86 #include + #elif defined(__aarch64__) || defined(__arm64__) #define SIMD_ARCH #include + #else #define SIMD_NONE #endif -#endif // MATH_H + +#ifdef _MSC_VER + #define ALIGN16 __declspec(align(16)) + #define RESTRICT __restrict__ +#else + #define ALIGN16 __attribute__((aligned(16))) + #define RESTRICT __restrict +#endif + +#endif // MCONFIG_H diff --git a/src/math/vec3.c b/src/math/vec3.c index 83fd09a..4dbc9eb 100644 --- a/src/math/vec3.c +++ b/src/math/vec3.c @@ -6,7 +6,7 @@ // #include "vec3.h" -#include "common_math.h" +#include "mconfig.h" #define VEC_SIZE 3 diff --git a/src/math/vec3.h b/src/math/vec3.h index df03e46..4529c62 100644 --- a/src/math/vec3.h +++ b/src/math/vec3.h @@ -8,11 +8,7 @@ #ifndef vec3_h #define vec3_h -#ifdef _MSC_VER -#define ALIGN16 __declspec(align(16)) -#else -#define ALIGN16 __attribute__((aligned(16))) -#endif +#include "mconfig.h" typedef union { @@ -20,25 +16,25 @@ typedef union float data[4]; } ALIGN16 Vec3f_t; -Vec3f_t vec3f_from_array(const float *restrict val); +Vec3f_t vec3f_from_array(const float *RESTRICT val); Vec3f_t vec3f(float x, float y, float z); // (f, f, f) Vec3f_t vec3f_scalar(float f); // (0, 0, 0) Vec3f_t vec3f_zero(void); -inline static Vec3f_t vec3f_clone(const Vec3f_t *restrict v) +inline static Vec3f_t vec3f_clone(const Vec3f_t *RESTRICT v) { return *v; } -Vec3f_t vec3f_add_r(Vec3f_t *restrict out, Vec3f_t a); +Vec3f_t vec3f_add_r(Vec3f_t *RESTRICT out, Vec3f_t a); Vec3f_t vec3f_add(Vec3f_t a, Vec3f_t b); -Vec3f_t vec3f_sub_r(Vec3f_t *restrict out, Vec3f_t a); +Vec3f_t vec3f_sub_r(Vec3f_t *RESTRICT out, Vec3f_t a); Vec3f_t vec3f_sub(Vec3f_t a, Vec3f_t b); -Vec3f_t vec3f_scale_r(Vec3f_t *restrict out, float scale); +Vec3f_t vec3f_scale_r(Vec3f_t *RESTRICT out, float scale); Vec3f_t vec3f_scale(Vec3f_t a, float scale); #endif /* vec3_h */ diff --git a/src/math/vec4.c b/src/math/vec4.c index a254132..59c267a 100644 --- a/src/math/vec4.c +++ b/src/math/vec4.c @@ -1,5 +1,4 @@ #include "vec4.h" -#include "common_math.h" #define VEC_SIZE 4 @@ -89,8 +88,8 @@ Vec4f_t vec4f_sub_r(Vec4f_t *restrict out, Vec4f_t a) _mm_store_ps(out->data, vres); #elif defined (SIMD_ARCH) - float32x4_t va = vld1q_f32(a.data); - float32x4_t vb = vld1q_f32(out->data); + float32x4_t va = vld1q_f32(out->data); + float32x4_t vb = vld1q_f32(a.data); float32x4_t vres = vsubq_f32(va, vb); vst1q_f32(out->data, vres); diff --git a/src/math/vec4.h b/src/math/vec4.h index db94d78..71f2a92 100644 --- a/src/math/vec4.h +++ b/src/math/vec4.h @@ -1,12 +1,7 @@ #ifndef VECTOR4_H #define VECTOR4_H -#include "vec3.h" -#ifdef _MSC_VER -#define ALIGN16 __declspec(align(16)) -#else -#define ALIGN16 __attribute__((aligned(16))) -#endif +#include "mconfig.h" // must be aligned by 16 Bytes (less instruction executed for SSE) typedef union @@ -15,24 +10,25 @@ typedef union float data[4]; } ALIGN16 Vec4f_t; -Vec4f_t vec4f_from_array(const float *restrict val); +Vec4f_t vec4f_from_array(const float *RESTRICT val); Vec4f_t vec4f(float x, float y, float z, float w); // (f, f, f, f) Vec4f_t vec4f_scalar(float f); // (0, 0, 0, 0) Vec4f_t vec4f_zero(void); -inline static Vec4f_t vec4f_clone(const Vec4f_t *restrict v) +inline static Vec4f_t vec4f_clone(const Vec4f_t *RESTRICT v) { return *v; } -Vec4f_t vec4f_add_r(Vec4f_t *restrict out, Vec4f_t a); + +Vec4f_t vec4f_add_r(Vec4f_t *RESTRICT out, Vec4f_t a); Vec4f_t vec4f_add(Vec4f_t a, Vec4f_t b); -Vec4f_t vec4f_sub_r(Vec4f_t *restrict out, Vec4f_t a); +Vec4f_t vec4f_sub_r(Vec4f_t *RESTRICT out, Vec4f_t a); Vec4f_t vec4f_sub(Vec4f_t a, Vec4f_t b); -Vec4f_t vec4f_scale_r(Vec4f_t *restrict out, float scale); +Vec4f_t vec4f_scale_r(Vec4f_t *RESTRICT out, float scale); Vec4f_t vec4f_scale(Vec4f_t a, float scale); float vec4_dot(Vec4f_t a, Vec4f_t b);