Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,44 @@ if(BUILD_LTO)
endif()
endif()

#
# SIMD SUPPORT (independent of OpenMP)
#

# Option to disable SIMD entirely
option(USE_SIMD "Enable SIMD optimizations (SSE4.2/AVX2)" ON)

if(USE_SIMD AND NOT WIN32)
include(CheckCXXCompilerFlag)

# Check for AVX2 support
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)
check_cxx_compiler_flag("-msse4.2" COMPILER_SUPPORTS_SSE42)
check_cxx_compiler_flag("-mfma" COMPILER_SUPPORTS_FMA)

if(COMPILER_SUPPORTS_AVX2)
message(STATUS "SIMD: AVX2 support detected")
add_compile_definitions(EIDOS_HAS_AVX2=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
if(COMPILER_SUPPORTS_FMA)
message(STATUS "SIMD: FMA support detected")
add_compile_definitions(EIDOS_HAS_FMA=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma")
endif()
elseif(COMPILER_SUPPORTS_SSE42)
message(STATUS "SIMD: SSE4.2 support detected (no AVX2)")
add_compile_definitions(EIDOS_HAS_SSE42=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
else()
message(STATUS "SIMD: No SIMD support detected, using scalar fallback")
endif()
elseif(USE_SIMD AND WIN32)
# Windows/MSVC detection not yet implemented
message(STATUS "SIMD: Windows SIMD detection not yet implemented, using scalar fallback")
else()
message(STATUS "SIMD: Disabled by user")
endif()

# GSL - adding /usr/local/include so all targets that use GSL_INCLUDES get omp.h
set(TARGET_NAME_GSL gsl)
file(GLOB_RECURSE GSL_SOURCES ${PROJECT_SOURCE_DIR}/gsl/*.c ${PROJECT_SOURCE_DIR}/gsl/*/*.c)
Expand Down
100 changes: 63 additions & 37 deletions eidos/eidos_functions_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@


#include "eidos_functions.h"
#include "eidos_simd.h"

#include <utility>
#include <string>
Expand Down Expand Up @@ -87,15 +88,19 @@ EidosValue_SP Eidos_ExecuteFunction_abs(const std::vector<EidosValue_SP> &p_argu
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
double *float_result_data = float_result->data_mutable();
result_SP = EidosValue_SP(float_result);


#ifdef _OPENMP
EIDOS_THREAD_COUNT(gEidos_OMP_threads_ABS_FLOAT);
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ABS_FLOAT) num_threads(thread_count)
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ABS_FLOAT) num_threads(thread_count)
for (int value_index = 0; value_index < x_count; ++value_index)
float_result_data[value_index] = fabs(float_data[value_index]);
#else
Eidos_SIMD::abs_float64(float_data, float_result_data, x_count);
#endif
}

result_SP->CopyDimensionsFromValue(x_value);

return result_SP;
}

Expand Down Expand Up @@ -190,21 +195,25 @@ EidosValue_SP Eidos_ExecuteFunction_atan2(const std::vector<EidosValue_SP> &p_ar
EidosValue_SP Eidos_ExecuteFunction_ceil(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
{
EidosValue_SP result_SP(nullptr);

EidosValue *x_value = p_arguments[0].get();
int x_count = x_value->Count();
const double *float_data = x_value->FloatData();
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
double *float_result_data = float_result->data_mutable();
result_SP = EidosValue_SP(float_result);


#ifdef _OPENMP
EIDOS_THREAD_COUNT(gEidos_OMP_threads_CEIL);
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_CEIL) num_threads(thread_count)
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_CEIL) num_threads(thread_count)
for (int value_index = 0; value_index < x_count; ++value_index)
float_result_data[value_index] = ceil(float_data[value_index]);

#else
Eidos_SIMD::ceil_float64(float_data, float_result_data, x_count);
#endif

result_SP->CopyDimensionsFromValue(x_value);

return result_SP;
}

Expand Down Expand Up @@ -359,21 +368,25 @@ EidosValue_SP Eidos_ExecuteFunction_exp(const std::vector<EidosValue_SP> &p_argu
EidosValue_SP Eidos_ExecuteFunction_floor(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
{
EidosValue_SP result_SP(nullptr);

EidosValue *x_value = p_arguments[0].get();
int x_count = x_value->Count();
const double *float_data = x_value->FloatData();
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
double *float_result_data = float_result->data_mutable();
result_SP = EidosValue_SP(float_result);


#ifdef _OPENMP
EIDOS_THREAD_COUNT(gEidos_OMP_threads_FLOOR);
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_FLOOR) num_threads(thread_count)
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_FLOOR) num_threads(thread_count)
for (int value_index = 0; value_index < x_count; ++value_index)
float_result_data[value_index] = floor(float_data[value_index]);

#else
Eidos_SIMD::floor_float64(float_data, float_result_data, x_count);
#endif

result_SP->CopyDimensionsFromValue(x_value);

return result_SP;
}

Expand Down Expand Up @@ -788,36 +801,37 @@ EidosValue_SP Eidos_ExecuteFunction_product(const std::vector<EidosValue_SP> &p_
else if (x_type == EidosValueType::kValueFloat)
{
const double *float_data = x_value->FloatData();
double product = 1;

for (int value_index = 0; value_index < x_count; ++value_index)
product *= float_data[value_index];

double product = Eidos_SIMD::product_float64(float_data, x_count);

result_SP = EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Float(product));
}

return result_SP;
}

// (float)round(float x)
EidosValue_SP Eidos_ExecuteFunction_round(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
{
EidosValue_SP result_SP(nullptr);

EidosValue *x_value = p_arguments[0].get();
int x_count = x_value->Count();
const double *float_data = x_value->FloatData();
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
double *float_result_data = float_result->data_mutable();
result_SP = EidosValue_SP(float_result);


#ifdef _OPENMP
EIDOS_THREAD_COUNT(gEidos_OMP_threads_ROUND);
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ROUND) num_threads(thread_count)
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ROUND) num_threads(thread_count)
for (int value_index = 0; value_index < x_count; ++value_index)
float_result_data[value_index] = round(float_data[value_index]);

#else
Eidos_SIMD::round_float64(float_data, float_result_data, x_count);
#endif

result_SP->CopyDimensionsFromValue(x_value);

return result_SP;
}

Expand Down Expand Up @@ -2426,15 +2440,19 @@ EidosValue_SP Eidos_ExecuteFunction_sqrt(const std::vector<EidosValue_SP> &p_arg
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
double *float_result_data = float_result->data_mutable();
result_SP = EidosValue_SP(float_result);


#ifdef _OPENMP
EIDOS_THREAD_COUNT(gEidos_OMP_threads_SQRT_FLOAT);
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_SQRT_FLOAT) num_threads(thread_count)
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_SQRT_FLOAT) num_threads(thread_count)
for (int value_index = 0; value_index < x_count; ++value_index)
float_result_data[value_index] = sqrt(float_data[value_index]);
#else
Eidos_SIMD::sqrt_float64(float_data, float_result_data, x_count);
#endif
}

result_SP->CopyDimensionsFromValue(x_value);

return result_SP;
}

Expand Down Expand Up @@ -2517,12 +2535,16 @@ EidosValue_SP Eidos_ExecuteFunction_sum(const std::vector<EidosValue_SP> &p_argu
{
const double *float_data = x_value->FloatData();
double sum = 0;


#ifdef _OPENMP
EIDOS_THREAD_COUNT(gEidos_OMP_threads_SUM_FLOAT);
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data) reduction(+: sum) if(parallel:x_count >= EIDOS_OMPMIN_SUM_FLOAT) num_threads(thread_count)
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data) reduction(+: sum) if(parallel:x_count >= EIDOS_OMPMIN_SUM_FLOAT) num_threads(thread_count)
for (int value_index = 0; value_index < x_count; ++value_index)
sum += float_data[value_index];

#else
sum = Eidos_SIMD::sum_float64(float_data, x_count);
#endif

result_SP = EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Float(sum));
}
else if (x_type == EidosValueType::kValueLogical)
Expand Down Expand Up @@ -2587,21 +2609,25 @@ EidosValue_SP Eidos_ExecuteFunction_tan(const std::vector<EidosValue_SP> &p_argu
EidosValue_SP Eidos_ExecuteFunction_trunc(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
{
EidosValue_SP result_SP(nullptr);

EidosValue *x_value = p_arguments[0].get();
int x_count = x_value->Count();
const double *float_data = x_value->FloatData();
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
double *float_result_data = float_result->data_mutable();
result_SP = EidosValue_SP(float_result);


#ifdef _OPENMP
EIDOS_THREAD_COUNT(gEidos_OMP_threads_TRUNC);
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_TRUNC) num_threads(thread_count)
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_TRUNC) num_threads(thread_count)
for (int value_index = 0; value_index < x_count; ++value_index)
float_result_data[value_index] = trunc(float_data[value_index]);

#else
Eidos_SIMD::trunc_float64(float_data, float_result_data, x_count);
#endif

result_SP->CopyDimensionsFromValue(x_value);

return result_SP;
}

Expand Down
Loading
Loading