diff --git a/code/intel/convolution/mkl_conv/Makefile b/code/intel/convolution/mkl_conv/Makefile index f6117be..07daa1e 100644 --- a/code/intel/convolution/mkl_conv/Makefile +++ b/code/intel/convolution/mkl_conv/Makefile @@ -33,7 +33,7 @@ ifeq ($(MKLLIB), mkl_rt) EXTRALIB = -L$(MKLROOT)/lib/intel64/ -lmkl_rt \ -Wl,-rpath,$(MKLROOT)/lib/intel64 endif -EXTRALIB += -liomp5 -lpthread -lm -ldl +EXTRALIB += -L$(MKLROOT)/lib/intel64/ -liomp5 -lpthread -lm -ldl endif ifeq ($(CONVLIB),MKLDNN) @@ -43,7 +43,7 @@ ifeq ($(MKLDNNROOT),) to the install directory.) endif EXTRACXXFLAGS = -I$(MKLDNNROOT)/include -DUSE_MKLDNN -EXTRALIB = -L$(MKLDNNROOT)/lib -lmkldnn -Wl,-rpath,$(MKLDNNROOT)/lib +EXTRALIB = -L$(MKLDNNROOT)/lib -lmkldnn -lmklml_intel -Wl,-rpath,$(MKLDNNROOT)/lib endif ifeq ($(DEBUG), 1) diff --git a/code/intel/convolution/mkl_conv/std_conv_bench.cpp b/code/intel/convolution/mkl_conv/std_conv_bench.cpp index 3860892..fd2229a 100644 --- a/code/intel/convolution/mkl_conv/std_conv_bench.cpp +++ b/code/intel/convolution/mkl_conv/std_conv_bench.cpp @@ -18,11 +18,13 @@ #include #include #include +#include #include #include #include #include +#include struct conv_problem { int minibatch; @@ -51,8 +53,6 @@ struct conv_problem { #define INFERENCE_SERVER 1 #define INFERENCE_DEVICE 2 -#define ITERS 1000 - // Calculates convolution output dimension using the definition from Caffe static inline int calc_out_dim( int input_dim, int filter_dim, int padd, int stride) @@ -352,10 +352,6 @@ static void usage() printf( "Usage: [OPTIONS]\n" "\n" - "Output control:\n" - " --csv-output Produce CSV output\n" - " --original-output Produce output in the original format\n" - "\n" "Control flops calculations:\n" " --no-skip-padding Count ops with padding zeroes (default)\n" " --skip-padding Do not count ops with padding zeroes\n" @@ -367,8 +363,21 @@ static void usage() " (AVX512_4VNNI CPUs)\n" "Problem set control:\n" " --training Training data set (default)\n" - " --inference-server Server inference data set\n" - " --inference-device Device inference data set\n" + " --inference Server inference data set\n" + " --device Device inference data set\n" + "Custom convolution definition:\n" + " --w Width\n" + " --h Height\n" + " --c \n" + " --n \n", + " --k \n", + " --filter_w \n", + " --filter_h \n", + " --pad_w \n", + " --pad_h \n", + " --wstride \n", + " --hstride \n", + " --repeat Number of times to test convolution (default: 50)\n", "\n" ); exit(-1); @@ -377,36 +386,185 @@ static void usage() int main(int argc, char **argv) { bool skip_padding = false; - bool csv_output = false; int precision = PREC_F32; - std::vector modes - = {FWD_CONVOLUTION, BWD_F_CONVOLUTION, BWD_D_CONVOLUTION}; + std::vector modes = {FWD_CONVOLUTION}; int problem_set = TRAINING; - - for(argc--, argv++; argc; argv++, argc--) { - if (*argv == std::string("--csv-output")) - csv_output = true; - else if (*argv == std::string("--original-output")) - csv_output = false; - else if (*argv == std::string("--skip-padding")) - skip_padding = true; - else if (*argv == std::string("--no-skip-padding")) - skip_padding = false; - else if (*argv == std::string("--f32")) - precision = PREC_F32; - else if (*argv == std::string("--u8s8u8")) - precision = PREC_U8S8U8; - else if (*argv == std::string("--s16s16s32")) - precision = PREC_S16S16S32; - else if (*argv == std::string("--inference-device")) - problem_set = INFERENCE_DEVICE; - else if (*argv == std::string("--inference-server")) - problem_set = INFERENCE_SERVER; - else if (*argv == std::string("--training")) - problem_set = TRAINING; - else - usage(); - } + // DEFAULTS + int ITERS = 50; + std::vector > *problems = nullptr; + unsigned int w, h, c, n, k, filter_w, filter_h, pad_w, pad_h, wstride, hstride; + w = 151; h = 40; c = 1; n = 1; k = 32; filter_w = 20; + filter_h = 5; pad_w = 8; pad_h = 8; wstride = 8; hstride = 2; + + // Use getopt_long here to allow for either driving the benchmark using + // built in tests, or make it a gemm tester + static struct option long_options[] = { + {"training", no_argument, 0, 0}, // These will run the full tests and override customization + {"inference", no_argument, 0, 0}, + {"device", no_argument, 0, 0}, + {"repeat", required_argument, 0, 0}, + {"w", required_argument, 0, 0}, + {"h", required_argument, 0, 0}, + {"c", required_argument, 0, 0}, + {"n", required_argument, 0, 0}, + {"k", required_argument, 0, 0}, + {"filter_w", required_argument, 0, 0}, + {"filter_h", required_argument, 0, 0}, + {"pad_w", required_argument, 0, 0}, + {"pad_h", required_argument, 0, 0}, + {"wstride", required_argument, 0, 0}, + {"hstride", required_argument, 0, 0}, + {"no-skip-padding", no_argument, 0, 0}, + {"skip-padding", no_argument, 0, 0}, + {"f32", no_argument, 0, 0}, + {"u8s8u8", no_argument, 0, 0}, + {"s16s16s32", no_argument, 0, 0}, + {0, 0, 0, 0} + }; + + int opt; + do { + int option_index = 0; + opt = getopt_long(argc, argv, "", long_options, &option_index); + switch (opt) { + case -1: + break; + case 0: + switch (option_index) { + case 0: + if (problems == nullptr) { + problems = &training_set; + modes = {FWD_CONVOLUTION, BWD_F_CONVOLUTION, BWD_D_CONVOLUTION}; + std::cout << "Running the training benchmark set" << std::endl; + } + break; + case 1: + if (problems == nullptr) { + problems = &inference_server_set; + std::cout << "Running the inference server set" << std::endl; + } + break; + case 2: + if (problems == nullptr) { + problems = &inference_device_set; + std::cout << "Running the inference device set" << std::endl; + } + break; + case 3: + ITERS = std::atoi(optarg); + if (ITERS <= 0) { + std::cerr << "Invalid repeat parameter spec'ed" << std::endl; + return 0; + } + break; + case 4: + w = std::atoi(optarg); + if (w <= 0) { + std::cerr << "Invalid w parameter spec'ed" << std::endl; + return 0; + } + break; + case 5: + h = std::atoi(optarg); + if (h <= 0) { + std::cerr << "Invalid h parameter spec'ed" << std::endl; + return 0; + } + break; + case 6: + c = std::atoi(optarg); + if (c <= 0) { + std::cerr << "Invalid c parameter spec'ed" << std::endl; + return 0; + } + break; + case 7: + n = std::atoi(optarg); + if (n <= 0) { + std::cerr << "Invalid n parameter spec'ed" << std::endl; + return 0; + } + break; + case 8: + k = std::atoi(optarg); + if (k <= 0) { + std::cerr << "Invalid k parameter spec'ed" << std::endl; + return 0; + } + break; + case 9: + filter_w = std::atoi(optarg); + if (filter_w <= 0) { + std::cerr << "Invalid filter_w paramter spec'ed" << std::endl; + return 0; + } + break; + case 10: + filter_h = std::atoi(optarg); + if (filter_h <= 0) { + std::cerr << "Invalid filter_h parameter spec'ed" << std::endl; + return 0; + } + break; + case 11: + pad_w = std::atoi(optarg); + if (pad_w < 0) { + std::cerr << "Invalid pad_w parameter spec'ed" << std::endl; + return 0; + } + break; + case 12: + pad_h = std::atoi(optarg); + if (pad_h < 0) { + std::cerr << "Invalid pad_h parameter spec'ed" << std::endl; + return 0; + } + break; + case 13: + wstride = std::atoi(optarg); + if (wstride <= 0) { + std::cerr << "Invalid wstride parameter spec'ed" << std::endl; + return 0; + } + break; + case 14: + hstride = std::atoi(optarg); + if (hstride <= 0) { + std::cerr << "Invalid hstride parameter spec'ed" << std::endl; + return 0; + } + break; + case 15: + skip_padding = false; + break; + case 16: + skip_padding = true; + break; + case 17: + precision = PREC_F32; + break; + case 18: + precision = PREC_U8S8U8; + break; + case 19: + precision = PREC_S16S16S32; + break; + default: + break; + } + break; + case '?': + usage(); + return 0; + break; + default: + usage(); + return 0; + break; + } + } while (opt != -1); #ifdef USE_MKL if (precision != PREC_F32) { @@ -416,43 +574,31 @@ int main(int argc, char **argv) } #endif -#ifdef USE_MKLDNN - if (precision != PREC_F32 || problem_set != TRAINING) - modes = {FWD_CONVOLUTION}; -#endif + if (problems == nullptr) { + problems = new std::vector >(); + problems->push_back(std::tuple(w, h, c, n, k, filter_w, + filter_h, pad_w, pad_h, wstride, hstride)); + } const char *conv_mode_strs[] = {"FWD", "BWD_F", "BWD_D"}; const char *skip_padding_strs[] - = {"w/ padding in flops", "w/o padding in flops"}; - - const auto &problems = (problem_set == TRAINING - ? training_set - : (problem_set == INFERENCE_DEVICE - ? inference_device_set - : inference_server_set)); + = {"w/ padding in flops", "w/o padding in flops"}; + printf("OP,w,h,c,n,k,filter_w,filter_h,pad_w,pad_h,wstride,hstride,usecs,gops\n"); for (auto m : modes) { - if (!csv_output) - printf(" %s Convolution\n", conv_mode_strs[m]); - for (const auto& problem : problems) { + for (const auto& problem : *problems) { conv_problem p; std::tie(p.w, p.h, p.ic, p.minibatch, p.oc, p.fw, p.fh, p.pad_w, p.pad_h, p.stride_w, p.stride_h) = problem; p.iters = ITERS; auto r = bench_conv(p, m, precision, skip_padding); - if (csv_output) - printf("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%e,%e,%e,%e\n", - conv_mode_strs[m], skip_padding, - p.minibatch, p.w, p.h, p.ic, p.oc, p.fw, p.fh, - p.stride_w, p.stride_h, p.pad_w, p.pad_h, - r.min_ms, r.max_gflops, r.avg_ms, r.avg_gflops); - else - printf("W=%d, H=%d, C=%d, N=%d, K=%d, S=%d, R=%d | " - "%s %s min(ms) %.2f; max(gflop/s) %.2f;" - "avg(ms) %.2f; avg(gflop/s) %.2f;\n", - p.w, p.h, p.ic, p.minibatch, p.oc, p.fw, p.fh, - conv_mode_strs[m], skip_padding_strs[skip_padding], - r.min_ms, r.max_gflops, r.avg_ms, r.avg_gflops); + printf("%s,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%f,%f\n", + conv_mode_strs[m], p.w, p.h, p.ic, p.minibatch, p.oc, + p.fw, p.fh,p.pad_w,p.pad_h,p.stride_h,p.stride_w,r.avg_ms*1000.0, r.avg_gflops); fflush(0); } } diff --git a/code/intel/gemm/Makefile b/code/intel/gemm/Makefile index 36e0278..b1df381 100644 --- a/code/intel/gemm/Makefile +++ b/code/intel/gemm/Makefile @@ -15,11 +15,16 @@ # ****************************************************************************** CC = icc -CFLAGS = -O2 -Wall -I$(MKLROOT)/include -I../../kernels -qopenmp -std=c++11 +CFLAGS = -O2 -Wall -I$(MKLROOT)/include -I../../kernels -fopenmp -std=c++11 -EXTRALIB = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_intel_thread.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -liomp5 -lpthread -lm -ldl +EXTRALIB = -Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_intel_thread.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -L$(MKLROOT)/lib/intel64/ -liomp5 -lpthread -lm -ldl -all : sbench sbench_pack +OPENBLAS_PATH?=/usr/local/openblas + +OPENBLAS_FLAGS = -O2 -Wall -I$(OPENBLAS_PATH)/include -I../../kernels -fopenmp -std=c++11 -DUSE_OPENBLAS +OPENBLAS_LIBS = -L$(OPENBLAS_PATH)/lib -DUSE_OPENBLAS -lpthread -lm -ldl -lopenblas + +all : sbench sbench_pack ibench_s8u8s32 sbench_oblas ibench_s8u8s32 : ibench_s8u8s32.o $(CC) $(CFLAGS) $^ $(EXTRALIB) -o $@ @@ -33,11 +38,17 @@ sbench_pack : sbench_pack.o sbench_pack.o : bench.cpp ../../kernels/gemm_problems.h $(CC) $(CFLAGS) -DPACKED_API -c -o $@ $< +sbench.o: bench.cpp ../../kernels/gemm_problems.h + $(CC) $(CFLAGS) -c -o $@ $< + sbench : sbench.o $(CC) $(CFLAGS) $^ $(EXTRALIB) -o $@ -sbench.o : bench.cpp ../../kernels/gemm_problems.h - $(CC) $(CFLAGS) -c -o $@ $< +sbench_oblas.o : bench.cpp ../../kernels/gemm_problems.h + $(CC) $(OPENBLAS_FLAGS) -c -o $@ $< + +sbench_oblas: sbench_oblas.o + $(CC) $(OPENBLAS_FLAGS) $^ $(OPENBLAS_LIBS) -o $@ clean : rm -f *.o sbench sbench_pack ibench_s8u8s32 diff --git a/code/intel/gemm/bench.cpp b/code/intel/gemm/bench.cpp index bc92d39..51c791c 100644 --- a/code/intel/gemm/bench.cpp +++ b/code/intel/gemm/bench.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,12 +30,16 @@ #include #include +#ifndef USE_OPENBLAS #include +#else +#include +#endif + #include "gemm_problems.h" #define FIX_LD(x) (((((x) + 127)/128)*128) + 16) -#define REPEAT 10 #define MKL_MEM_ALIGNMENT (4*1024) #ifdef IGEMM_S8U8S32 @@ -53,8 +58,13 @@ typedef struct gemm_params { bool ta; bool tb; +#ifdef USE_OPENBLAS + CBLAS_TRANSPOSE transa; + CBLAS_TRANSPOSE transb; +#else char transa; char transb; +#endif int m; int n; int k; @@ -63,7 +73,18 @@ typedef struct gemm_params { int ldc; } gemm_params_t; - +void print_usage() +{ + std::cout << " " << std::endl; + std::cout << std::left << std::setw(30) << "\tARGS" << std::endl; + std::cout << std::left << std::setw(30) << "\t--training|inference|device" << "\tSelect and run the built in input set" << std::endl; + std::cout << std::left << std::setw(30) << "\t--m" << "\tNum rows matrix A" << std::endl; + std::cout << std::left << std::setw(30) << "\t--n" << "\tNum cols matrix B" << std::endl; + std::cout << std::left << std::setw(30) << "\t--k" << "\tNum cols matrix A, rows Matrix B" << std::endl; + std::cout << std::left << std::setw(30) << "\t--ta" << "\tTranspose A" << std::endl; + std::cout << std::left << std::setw(30) << "\t--tb" << "\tTranspose B" << std::endl; + return; +} int main(int argc, char *argv[]) { @@ -76,25 +97,117 @@ int main(int argc, char *argv[]) B_TYPE *B; C_TYPE *C, co = 0; float alpha = 1.0, beta = 1.0; - double flops, total_flops = 0., st_time, end_time, ave_time, total_time = 0.; + double flops, total_flops = 0., ave_time, total_time = 0.; #ifdef PACKED_API float *AP, *BP; #endif + // DEFAULT settings + int REPEAT = 10; + // Default matrix test size if we are doing a single test + int m, n, k; + m = 128; n = 128; k = 128; + bool ta, tb; + ta = false; tb = false; + std::vector>* p_problem_set = nullptr; - int run_training_set = 1; - if (argc > 1) run_training_set = atoi(argv[1]); + // Use getopt_long here to allow for either driving the benchmark using + // built in tests, or make it a gemm tester + static struct option long_options[] = { + {"training", no_argument, 0, 0}, // These will run the full tests and override customization + {"inference", no_argument, 0, 0}, + {"device", no_argument, 0, 0}, + {"repeat", required_argument, 0, 0}, + {"m", required_argument, 0, 0}, + {"n", required_argument, 0, 0}, + {"k", required_argument, 0, 0}, + {"ta", no_argument, 0, 0}, + {"tb", no_argument, 0, 0}, + {0, 0, 0, 0} + }; - std::vector>* p_problem_set; - if (run_training_set) { - printf("Running the training benchmark (set first program argument to 0 for inference)\n"); - p_problem_set = &training_set; - } else { - printf("Running the inference benchmark (first program argument is 0)\n"); - p_problem_set = &inference_server_set; + int c; + do { + int option_index = 0; + c = getopt_long(argc, argv, "", long_options, &option_index); + switch (c) { + case -1: + break; + case 0: + switch (option_index) { + case 0: + if (p_problem_set == nullptr) { + p_problem_set = &training_set; + std::cout << "Running the training benchmark set" << std::endl; + } + break; + case 1: + if (p_problem_set == nullptr) { + p_problem_set = &inference_server_set; + std::cout << "Running the inference server set" << std::endl; + } + break; + case 2: + if (p_problem_set == nullptr) { + p_problem_set = &inference_device_set; + std::cout << "Running the inference device set" << std::endl; + } + break; + case 3: + REPEAT = std::atoi(optarg); + if (REPEAT <= 0) { + std::cerr << "Invalid repeat parameter spec'ed" << std::endl; + return 0; + } + break; + case 4: + m = std::atoi(optarg); + if (m <= 0) { + std::cerr << "Invalid m parameter spec'ed" << std::endl; + return 0; + } + break; + case 5: + n = std::atoi(optarg); + if (n <= 0) { + std::cerr << "Invalid n parameter spec'ed" << std::endl; + return 0; + } + break; + case 6: + k = std::atoi(optarg); + if (k <= 0) { + std::cerr << "Invalid k parameter spec'ed" << std::endl; + return 0; + } + break; + case 7: + ta = true; + break; + case 8: + tb = true; + break; + default: + break; + } + break; + case '?': + print_usage(); + return 0; + break; + default: + print_usage(); + return 0; + break; + } + } while (c != -1); + + if (p_problem_set == nullptr) { + p_problem_set = new std::vector >(); + p_problem_set->push_back(std::tuple(m, n, k, ta, tb)); } num_gemms = p_problem_set->size(); - gemm_params_t* p_gemm_params = (gemm_params_t*) _mm_malloc(num_gemms*sizeof(gemm_params_t), 64); + gemm_params_t* p_gemm_params = (gemm_params_t*) malloc(num_gemms*sizeof(gemm_params_t)); i = 0; for (const auto &problem : *p_problem_set) { @@ -104,21 +217,37 @@ int main(int argc, char *argv[]) if (p_gemm_params[i].ta) { p_gemm_params[i].lda = FIX_LD(p_gemm_params[i].k); sizea = p_gemm_params[i].lda * p_gemm_params[i].m; +#ifdef USE_OPENBLAS + p_gemm_params[i].transa = CblasTrans; +#else p_gemm_params[i].transa = 'T'; +#endif } else { p_gemm_params[i].lda = FIX_LD(p_gemm_params[i].m); sizea = p_gemm_params[i].lda * p_gemm_params[i].k; +#ifdef USE_OPENBLAS + p_gemm_params[i].transa = CblasNoTrans; +#else p_gemm_params[i].transa = 'N'; +#endif } if (p_gemm_params[i].tb) { p_gemm_params[i].ldb = FIX_LD(p_gemm_params[i].n); sizeb = p_gemm_params[i].ldb * p_gemm_params[i].k; +#ifdef USE_OPENBLAS + p_gemm_params[i].transb = CblasTrans; +#else p_gemm_params[i].transb = 'T'; +#endif } else { p_gemm_params[i].ldb = FIX_LD(p_gemm_params[i].k); sizeb = p_gemm_params[i].ldb * p_gemm_params[i].n; +#ifdef USE_OPENBLAS + p_gemm_params[i].transb = CblasNoTrans; +#else p_gemm_params[i].transb = 'N'; +#endif } p_gemm_params[i].ldc = FIX_LD(p_gemm_params[i].m); @@ -136,12 +265,17 @@ int main(int argc, char *argv[]) assert(i == num_gemms); +#ifdef USE_OPENBLAS + A = (A_TYPE*) malloc(sizeof(A_TYPE)*max_sizea); + B = (B_TYPE*) malloc(sizeof(B_TYPE)*max_sizeb); + C = (C_TYPE*) malloc(sizeof(C_TYPE)*max_sizec); +#elif defined(PACKED_API) + AP = sgemm_alloc("A", &max_m, &max_n, &max_k); + BP = sgemm_alloc("B", &max_m, &max_n, &max_k); +#else A = (A_TYPE*) mkl_malloc(sizeof(A_TYPE)*max_sizea, MKL_MEM_ALIGNMENT); B = (B_TYPE*) mkl_malloc(sizeof(B_TYPE)*max_sizeb, MKL_MEM_ALIGNMENT); C = (C_TYPE*) mkl_malloc(sizeof(C_TYPE)*max_sizec, MKL_MEM_ALIGNMENT); -#ifdef PACKED_API - AP = sgemm_alloc("A", &max_m, &max_n, &max_k); - BP = sgemm_alloc("B", &max_m, &max_n, &max_k); #endif #ifdef IGEMM_S8U8S32 @@ -154,6 +288,9 @@ int main(int argc, char *argv[]) for (i=0; i(end_time - st_time).count() /REPEAT; total_time += ave_time; #ifdef IGEMM_S8U8S32 - printf("GEMM_S8U8S32(%c,%c,%d,%d,%d) %.1f usec %.5f GOp/sec \n", - p_gemm_params[i].transa, p_gemm_params[i].transb, + printf("GEMM_S8U8S32,%s,%s,%d,%d,%d,%.1f,%.5f\n", + p_gemm_params[i].ta ? "true":"false", p_gemm_params[i].tb ? "true":"false", p_gemm_params[i].m, p_gemm_params[i].n, p_gemm_params[i].k, ave_time, 1E-3*flops/ave_time); #else - printf("SGEMM(%c,%c,%d,%d,%d) %.1f usec %.5f GFlop/sec \n", - p_gemm_params[i].transa, p_gemm_params[i].transb, + printf("SGEMM,%s,%s,%d,%d,%d,%.1f,%.5f\n", + p_gemm_params[i].ta ? "true":"false", p_gemm_params[i].tb ? "true":"false", p_gemm_params[i].m, p_gemm_params[i].n, p_gemm_params[i].k, ave_time, 1E-3*flops/ave_time); #endif } +#ifdef USE_OPENBLAS + free(A); + free(B); + free(C); +#elif defined(PACKED_API) + sgemm_free(AP); + sgemm_free(BP); +#else mkl_free(A); mkl_free(B); mkl_free(C); -#ifdef PACKED_API - sgemm_free(AP); - sgemm_free(BP); #endif - #ifdef IGEMM_S8U8S32 printf("Total time %.1f usec, Overall Performance: %.5f GOp/sec \n", total_time, 1E-3*total_flops/total_time); #else diff --git a/code/intel/gemm/run_mkl_igemm_ia.sh b/code/intel/gemm/run_mkl_igemm_ia.sh index 6644f9c..1e15257 100755 --- a/code/intel/gemm/run_mkl_igemm_ia.sh +++ b/code/intel/gemm/run_mkl_igemm_ia.sh @@ -34,4 +34,4 @@ echo "------------------------" echo " GEMM_S8U8S32 - " echo "--------------" echo " " -numactl -m 1 ./ibench_s8u8s32 +numactl -m 0 ./ibench_s8u8s32 0 diff --git a/code/intel/gemm/run_mkl_sgemm_ia.sh b/code/intel/gemm/run_mkl_sgemm_ia.sh index b7d7ed7..7d16a0c 100755 --- a/code/intel/gemm/run_mkl_sgemm_ia.sh +++ b/code/intel/gemm/run_mkl_sgemm_ia.sh @@ -33,10 +33,10 @@ echo "------------------------" echo " SGEMM - " echo "--------------" echo " " -numactl -m 1 ./sbench +numactl -m 0 ./sbench echo " " echo "------------------------" echo " Packed SGEMM - " echo "--------------" echo " " -numactl -m 1 ./sbench_pack +numactl -m 0 ./sbench_pack