From 675ec93043c958abede675b6c1f415c151f62983 Mon Sep 17 00:00:00 2001 From: Kevin Dungs Date: Thu, 12 Feb 2015 10:28:24 +0100 Subject: [PATCH 1/4] Add example for why templates are superior to boolean value flags. Will also add an example for functions as parameters. --- templates/Makefile | 11 +++++++++ templates/README.md | 51 ++++++++++++++++++++++++++++++++++++++++++ templates/branching.cc | 28 +++++++++++++++++++++++ templates/timer.hpp | 19 ++++++++++++++++ 4 files changed, 109 insertions(+) create mode 100644 templates/Makefile create mode 100644 templates/README.md create mode 100644 templates/branching.cc create mode 100644 templates/timer.hpp diff --git a/templates/Makefile b/templates/Makefile new file mode 100644 index 0000000..e5521b4 --- /dev/null +++ b/templates/Makefile @@ -0,0 +1,11 @@ +CXX=clang++ +CXXFLAGS=-O3 -Wall -Werror -pedantic -std=c++11 +ASMFLAGS+=-S -mllvm --x86-asm-syntax=intel + +all: branching.s + +%.s: %.cc + ${CXX} ${CXXFLAGS} ${ASMFLAGS} $^ + +clean: + rm -f *.s diff --git a/templates/README.md b/templates/README.md new file mode 100644 index 0000000..ece3b81 --- /dev/null +++ b/templates/README.md @@ -0,0 +1,51 @@ +# Templates +Here are two examples where the use of templates is superior to what some people +might call the intuitive way. + +The clang-specific `__attribute__ ((noinline))` is used in the examples +whenever it is necessary to prevent inlining for the sake of the argument. Just +imagine the function could not be inlined in the real world because it was more +complex than the examples. (Note this is different from using the `-O0` compiler +flag.) + +The function `doNotOptimizeAway` uses dark magic to make sure the compiler does +just not eliminate our otherwise empty function calls. + +## Flags +In `branching.cc` there are two functions that supposedly do the same thing: +`funWithFlagParam` and `funWithFlagTpl`. The former takes a boolean as its +parameter (by value) and depending on the value of the boolean calls another +function. `funWithFlagTpl` does the exact same thing except that here the +boolean is a template parameter. +Compiling the example with the attached Makefile will give a file `branching.s` +containing the generated assembly code. + +Even without knowledge of assembly, it can easily been seen that, and why, the +templated function is superior. First of all it can be seen that there is a +single definition of `funWithFlagParam` called `__Z16funWithFlagParamb` while +there are two individual definitions for `funWithFlagTpl` called +`__Z14funWithFlagTplILb0EEvv` and `__Z14funWithFlagTplILb1EEvv`. Looking at main +we find essentially four `call`s as expected: + +```asm + call __Z16funWithFlagParamb + xor edi, edi + call __Z16funWithFlagParamb + call __Z14funWithFlagTplILb1EEvv + call __Z14funWithFlagTplILb0EEvv +``` + +The important difference in the actual functions stems from these two lines in +`__Z16funWithFlagParamb`: +```asm + test dil, dil + je LBB1_2 +``` +where a conditional jump is executed. This means whenever our flag is `false`, +the function performs an additional (and potentially expensive) jump. + +_This is not the whole story._ In reality a CPU performs speculative branching +and other optimizations, that probably deserve another example... + + +## Functions as Parameters diff --git a/templates/branching.cc b/templates/branching.cc new file mode 100644 index 0000000..91b59aa --- /dev/null +++ b/templates/branching.cc @@ -0,0 +1,28 @@ +#include "timer.hpp" + +__attribute__ ((noinline)) void doSomething() { doNotOptimizeAway(42); } +__attribute__ ((noinline)) void doSomethingElse() { doNotOptimizeAway(23); } + +__attribute__ ((noinline)) void funWithFlagParam(bool flag) { + if (flag) { + doSomething(); + } else { + doSomethingElse(); + } +} + +template +__attribute__ ((noinline)) void funWithFlagTpl() { + if (FLAG) { + doSomething(); + } else { + doSomethingElse(); + } +} + +int main() { + funWithFlagParam(true); + funWithFlagParam(false); + funWithFlagTpl(); + funWithFlagTpl(); +} diff --git a/templates/timer.hpp b/templates/timer.hpp new file mode 100644 index 0000000..15dcc80 --- /dev/null +++ b/templates/timer.hpp @@ -0,0 +1,19 @@ +#include + +template +void doNotOptimizeAway(T&& datum) { + asm volatile("" : "+r" (datum)); +} +class Timer { +private: + std::chrono::time_point start_time; +public: + void start() { + start_time = std::chrono::high_resolution_clock::now(); + } + + double stop() { + auto stop_time = std::chrono::high_resolution_clock::now(); + return std::chrono::duration_cast(stop_time - start_time).count(); + } +}; From 121be8b71c08d433750ce9a8e077e4a696eb98f0 Mon Sep 17 00:00:00 2001 From: Kevin Dungs Date: Thu, 12 Feb 2015 10:37:37 +0100 Subject: [PATCH 2/4] Remove timing related stuff. Refactor doSomething methods. --- templates/branching.cc | 5 +---- templates/helpers.hpp | 9 +++++++++ templates/timer.hpp | 19 ------------------- 3 files changed, 10 insertions(+), 23 deletions(-) create mode 100644 templates/helpers.hpp delete mode 100644 templates/timer.hpp diff --git a/templates/branching.cc b/templates/branching.cc index 91b59aa..ce78b75 100644 --- a/templates/branching.cc +++ b/templates/branching.cc @@ -1,7 +1,4 @@ -#include "timer.hpp" - -__attribute__ ((noinline)) void doSomething() { doNotOptimizeAway(42); } -__attribute__ ((noinline)) void doSomethingElse() { doNotOptimizeAway(23); } +#include "helpers.hpp" __attribute__ ((noinline)) void funWithFlagParam(bool flag) { if (flag) { diff --git a/templates/helpers.hpp b/templates/helpers.hpp new file mode 100644 index 0000000..bd6d9fc --- /dev/null +++ b/templates/helpers.hpp @@ -0,0 +1,9 @@ +#pragma once + +template +void doNotOptimizeAway(T&& datum) { + asm volatile("" : "+r" (datum)); +} + +__attribute__ ((noinline)) void doSomething() { doNotOptimizeAway(42); } +__attribute__ ((noinline)) void doSomethingElse() { doNotOptimizeAway(23); } diff --git a/templates/timer.hpp b/templates/timer.hpp deleted file mode 100644 index 15dcc80..0000000 --- a/templates/timer.hpp +++ /dev/null @@ -1,19 +0,0 @@ -#include - -template -void doNotOptimizeAway(T&& datum) { - asm volatile("" : "+r" (datum)); -} -class Timer { -private: - std::chrono::time_point start_time; -public: - void start() { - start_time = std::chrono::high_resolution_clock::now(); - } - - double stop() { - auto stop_time = std::chrono::high_resolution_clock::now(); - return std::chrono::duration_cast(stop_time - start_time).count(); - } -}; From 349b17066a18637381bbb09420cf2ae3c16e1917 Mon Sep 17 00:00:00 2001 From: Kevin Dungs Date: Thu, 12 Feb 2015 10:43:29 +0100 Subject: [PATCH 3/4] Add example for functions. Not quite there yet. --- templates/Makefile | 2 +- templates/functions.cc | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 templates/functions.cc diff --git a/templates/Makefile b/templates/Makefile index e5521b4..e144f9e 100644 --- a/templates/Makefile +++ b/templates/Makefile @@ -2,7 +2,7 @@ CXX=clang++ CXXFLAGS=-O3 -Wall -Werror -pedantic -std=c++11 ASMFLAGS+=-S -mllvm --x86-asm-syntax=intel -all: branching.s +all: branching.s functions.s %.s: %.cc ${CXX} ${CXXFLAGS} ${ASMFLAGS} $^ diff --git a/templates/functions.cc b/templates/functions.cc new file mode 100644 index 0000000..aa680ce --- /dev/null +++ b/templates/functions.cc @@ -0,0 +1,15 @@ +#include "helpers.hpp" + +__attribute__ ((noinline)) void doPtr(void (*f)()) { f(); } + +template +__attribute__ ((noinline)) void doTpl(FN f) { + f(); +} + +int main() { + doPtr(doSomething); + doPtr(doSomethingElse); + doTpl(doSomething); + doTpl(doSomethingElse); +} From 789f6b8117cdd33211cf5b2687d0e50b241fc48f Mon Sep 17 00:00:00 2001 From: Kevin Dungs Date: Thu, 12 Feb 2015 10:56:16 +0100 Subject: [PATCH 4/4] Remove functions. That was not really what I wanted :D --- templates/Makefile | 2 +- templates/README.md | 5 +---- templates/functions.cc | 15 --------------- 3 files changed, 2 insertions(+), 20 deletions(-) delete mode 100644 templates/functions.cc diff --git a/templates/Makefile b/templates/Makefile index e144f9e..e5521b4 100644 --- a/templates/Makefile +++ b/templates/Makefile @@ -2,7 +2,7 @@ CXX=clang++ CXXFLAGS=-O3 -Wall -Werror -pedantic -std=c++11 ASMFLAGS+=-S -mllvm --x86-asm-syntax=intel -all: branching.s functions.s +all: branching.s %.s: %.cc ${CXX} ${CXXFLAGS} ${ASMFLAGS} $^ diff --git a/templates/README.md b/templates/README.md index ece3b81..b607054 100644 --- a/templates/README.md +++ b/templates/README.md @@ -1,5 +1,5 @@ # Templates -Here are two examples where the use of templates is superior to what some people +Here is an example where the use of templates is superior to what some people might call the intuitive way. The clang-specific `__attribute__ ((noinline))` is used in the examples @@ -46,6 +46,3 @@ the function performs an additional (and potentially expensive) jump. _This is not the whole story._ In reality a CPU performs speculative branching and other optimizations, that probably deserve another example... - - -## Functions as Parameters diff --git a/templates/functions.cc b/templates/functions.cc deleted file mode 100644 index aa680ce..0000000 --- a/templates/functions.cc +++ /dev/null @@ -1,15 +0,0 @@ -#include "helpers.hpp" - -__attribute__ ((noinline)) void doPtr(void (*f)()) { f(); } - -template -__attribute__ ((noinline)) void doTpl(FN f) { - f(); -} - -int main() { - doPtr(doSomething); - doPtr(doSomethingElse); - doTpl(doSomething); - doTpl(doSomethingElse); -}