diff --git a/templates/Makefile b/templates/Makefile new file mode 100644 index 0000000..e5521b4 --- /dev/null +++ b/templates/Makefile @@ -0,0 +1,11 @@ +CXX=clang++ +CXXFLAGS=-O3 -Wall -Werror -pedantic -std=c++11 +ASMFLAGS+=-S -mllvm --x86-asm-syntax=intel + +all: branching.s + +%.s: %.cc + ${CXX} ${CXXFLAGS} ${ASMFLAGS} $^ + +clean: + rm -f *.s diff --git a/templates/README.md b/templates/README.md new file mode 100644 index 0000000..b607054 --- /dev/null +++ b/templates/README.md @@ -0,0 +1,48 @@ +# Templates +Here is an example where the use of templates is superior to what some people +might call the intuitive way. + +The clang-specific `__attribute__ ((noinline))` is used in the examples +whenever it is necessary to prevent inlining for the sake of the argument. Just +imagine the function could not be inlined in the real world because it was more +complex than the examples. (Note this is different from using the `-O0` compiler +flag.) + +The function `doNotOptimizeAway` uses dark magic to make sure the compiler does +just not eliminate our otherwise empty function calls. + +## Flags +In `branching.cc` there are two functions that supposedly do the same thing: +`funWithFlagParam` and `funWithFlagTpl`. The former takes a boolean as its +parameter (by value) and depending on the value of the boolean calls another +function. `funWithFlagTpl` does the exact same thing except that here the +boolean is a template parameter. +Compiling the example with the attached Makefile will give a file `branching.s` +containing the generated assembly code. + +Even without knowledge of assembly, it can easily been seen that, and why, the +templated function is superior. First of all it can be seen that there is a +single definition of `funWithFlagParam` called `__Z16funWithFlagParamb` while +there are two individual definitions for `funWithFlagTpl` called +`__Z14funWithFlagTplILb0EEvv` and `__Z14funWithFlagTplILb1EEvv`. Looking at main +we find essentially four `call`s as expected: + +```asm + call __Z16funWithFlagParamb + xor edi, edi + call __Z16funWithFlagParamb + call __Z14funWithFlagTplILb1EEvv + call __Z14funWithFlagTplILb0EEvv +``` + +The important difference in the actual functions stems from these two lines in +`__Z16funWithFlagParamb`: +```asm + test dil, dil + je LBB1_2 +``` +where a conditional jump is executed. This means whenever our flag is `false`, +the function performs an additional (and potentially expensive) jump. + +_This is not the whole story._ In reality a CPU performs speculative branching +and other optimizations, that probably deserve another example... diff --git a/templates/branching.cc b/templates/branching.cc new file mode 100644 index 0000000..ce78b75 --- /dev/null +++ b/templates/branching.cc @@ -0,0 +1,25 @@ +#include "helpers.hpp" + +__attribute__ ((noinline)) void funWithFlagParam(bool flag) { + if (flag) { + doSomething(); + } else { + doSomethingElse(); + } +} + +template +__attribute__ ((noinline)) void funWithFlagTpl() { + if (FLAG) { + doSomething(); + } else { + doSomethingElse(); + } +} + +int main() { + funWithFlagParam(true); + funWithFlagParam(false); + funWithFlagTpl(); + funWithFlagTpl(); +} diff --git a/templates/helpers.hpp b/templates/helpers.hpp new file mode 100644 index 0000000..bd6d9fc --- /dev/null +++ b/templates/helpers.hpp @@ -0,0 +1,9 @@ +#pragma once + +template +void doNotOptimizeAway(T&& datum) { + asm volatile("" : "+r" (datum)); +} + +__attribute__ ((noinline)) void doSomething() { doNotOptimizeAway(42); } +__attribute__ ((noinline)) void doSomethingElse() { doNotOptimizeAway(23); }