| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*************************************** | ||
| 2 | Auteur : Pierre Aubert | ||
| 3 | Mail : pierre.aubert@lapp.in2p3.fr | ||
| 4 | Licence : CeCILL-C | ||
| 5 | ****************************************/ | ||
| 6 | |||
| 7 | #ifndef __MICRO_BENCHMARK_NS_IMPL_H__ | ||
| 8 | #define __MICRO_BENCHMARK_NS_IMPL_H__ | ||
| 9 | |||
| 10 | #include <cmath> | ||
| 11 | #include "micro_benchmark_ns.h" | ||
| 12 | |||
| 13 | ///Minimum time for performance test in ns | ||
| 14 | #define PHOENIX_MINIMUM_TIME_NS 1000000000.0 | ||
| 15 | |||
| 16 | ///Prevent compiler from optimising a loop | ||
| 17 | /** @param value : reference to a value | ||
| 18 | */ | ||
| 19 | template <class T> | ||
| 20 | [[gnu::always_inline]] inline void phoenix_doNotOptimize(T const& value) { | ||
| 21 | asm volatile("" : : "r,m"(value) : "memory"); | ||
| 22 | } | ||
| 23 | |||
| 24 | ///Prevent compiler from optimising a loop | ||
| 25 | /** @param value : reference to a value | ||
| 26 | */ | ||
| 27 | template <class T> | ||
| 28 | [[gnu::always_inline]] inline void phoenix_doNotOptimize(T & value) { | ||
| 29 | #if defined(__clang__) | ||
| 30 | asm volatile("" : "+r,m"(value) : : "memory"); | ||
| 31 | #else | ||
| 32 | 1046124300 | asm volatile("" : "+m,r"(value) : : "memory"); | |
| 33 | #endif | ||
| 34 | 1046124652 | } | |
| 35 | |||
| 36 | ///Do the micro benchmarking of a given function and gives performance results in ns | ||
| 37 | /** @param[out] ellapsedTimeNs : ellapsed time in ns | ||
| 38 | * @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns | ||
| 39 | * @param nbTestPerf : number of performance test | ||
| 40 | * @param nbCallPerTest : number of calls per performance test | ||
| 41 | * @param __f : function to be called and benchmarked | ||
| 42 | * @param __args : parameter of the function to be benchmarked | ||
| 43 | */ | ||
| 44 | template<typename _Callable, typename... _Args> | ||
| 45 | 352 | void micro_benchmarkNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, size_t nbTestPerf, size_t nbCallPerTest, | |
| 46 | _Callable&& __f, _Args&&... __args) | ||
| 47 | { | ||
| 48 | 352 | VecEllapsedTime vecTimeNs; | |
| 49 | 352 | int res = 0; | |
| 50 | ::phoenix_doNotOptimize(res); | ||
| 51 |
2/2✓ Branch 0 (18→6) taken 50320 times.
✓ Branch 1 (18→19) taken 352 times.
|
50672 | for(size_t i(0lu); i < nbTestPerf; ++i){ |
| 52 | //Stating the timer | ||
| 53 |
1/1✓ Branch 0 (6→7) taken 50320 times.
|
50320 | HiPeTime beginTime = phoenix_getTime(); |
| 54 |
2/2✓ Branch 0 (12→8) taken 1046124300 times.
✓ Branch 1 (12→13) taken 50320 times.
|
1046174620 | for(size_t j(0lu); j < nbCallPerTest; ++j){ |
| 55 | ::phoenix_doNotOptimize(res); | ||
| 56 |
1/1✓ Branch 0 (10→11) taken 1046124300 times.
|
1046124300 | __f(__args...); |
| 57 | } | ||
| 58 | //Get the time of the nbCallPerTest calls | ||
| 59 |
2/2✓ Branch 0 (13→14) taken 50320 times.
✓ Branch 2 (14→15) taken 50320 times.
|
50320 | NanoSecs elapsedTime(phoenix_getTime() - beginTime); |
| 60 | 50320 | double fullNs(elapsedTime.count()/((double)nbCallPerTest)); | |
| 61 |
1/1✓ Branch 0 (16→17) taken 50320 times.
|
50320 | vecTimeNs.push_back(fullNs); |
| 62 | } | ||
| 63 | 352 | MapOrderedTime mapOrderTime; | |
| 64 |
1/1✓ Branch 0 (20→21) taken 352 times.
|
352 | micro_benchmarkVecToMap(mapOrderTime, vecTimeNs); |
| 65 | 352 | size_t nbValueUsed(vecTimeNs.size()*0.7 + 1lu); | |
| 66 |
1/2✗ Branch 0 (23→24) not taken.
✓ Branch 1 (23→25) taken 352 times.
|
352 | if(nbValueUsed > vecTimeNs.size()){ |
| 67 | ✗ | nbValueUsed = vecTimeNs.size(); | |
| 68 | } | ||
| 69 |
1/1✓ Branch 0 (25→26) taken 352 times.
|
352 | micro_benchmarkComputeTime(ellapsedTimeNs, ellapsedTimeErrorNs, mapOrderTime, nbValueUsed); |
| 70 | 352 | } | |
| 71 | |||
| 72 | |||
| 73 | ///Do the micro benchmarking of a given function and gives performance results in ns | ||
| 74 | /** @param[out] ellapsedTimeNs : ellapsed time in ns | ||
| 75 | * @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns | ||
| 76 | * @param __f : function to be called and benchmarked | ||
| 77 | * @param __args : parameter of the function to be benchmarked | ||
| 78 | * This function tries to find automatically a relevant performance measurement | ||
| 79 | */ | ||
| 80 | template<typename _Callable, typename... _Args> | ||
| 81 | 66 | void micro_benchmarkAutoNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, _Callable&& __f, _Args&&... __args){ | |
| 82 | 66 | size_t nbTestPerf(100lu), nbCallPerTest(10lu); | |
| 83 | //Let's try with default values | ||
| 84 | 66 | micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); | |
| 85 | // while(std::isnan(ellapsedTimeNs)){ | ||
| 86 | // std::cout << "micro_benchmarkAutoNs : ellapsedTimeNs is NaN ! Adjusting nbTestPerf = " << nbTestPerf << std::endl; | ||
| 87 | // nbCallPerTest += 5lu; | ||
| 88 | // micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); | ||
| 89 | // std::cout << "micro_benchmarkAutoNs = nbTestPerf = "<<nbTestPerf<<", nbCallPerTest" << nbCallPerTest << std::endl; | ||
| 90 | // } | ||
| 91 | 66 | double fullEllapsedTime(ellapsedTimeNs*((double)nbTestPerf)*((double)nbCallPerTest)); | |
| 92 | //Check if the total time is more than one second | ||
| 93 |
4/6✓ Branch 0 (8→9) taken 82 times.
✓ Branch 1 (8→11) taken 66 times.
✓ Branch 2 (9→10) taken 82 times.
✗ Branch 3 (9→11) not taken.
✓ Branch 4 (10→4) taken 82 times.
✗ Branch 5 (10→11) not taken.
|
148 | while(fullEllapsedTime < PHOENIX_MINIMUM_TIME_NS && nbCallPerTest < 1000000000lu && nbCallPerTest > 0lu){ //Let's try again if the total time is less than one second |
| 94 | //If the total time is less than one second, we try to change nbTestPerf and nbCallPerTest to make it about one second | ||
| 95 | 82 | double ratioTime((1.3*PHOENIX_MINIMUM_TIME_NS)/fullEllapsedTime); | |
| 96 |
1/2✗ Branch 0 (4→5) not taken.
✓ Branch 1 (4→6) taken 82 times.
|
82 | if(ratioTime < 1.2){ |
| 97 | ✗ | ratioTime = 1.2; | |
| 98 | } | ||
| 99 | // double ratioSqrt(std::sqrt(ratioTime) + 1.0); | ||
| 100 | //Let's modify the call | ||
| 101 | // nbTestPerf *= ratioSqrt; | ||
| 102 | // nbCallPerTest *= ratioSqrt; | ||
| 103 | |||
| 104 | 82 | nbCallPerTest *= ratioTime; | |
| 105 | |||
| 106 | //Let's try again | ||
| 107 | 82 | micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); | |
| 108 | 82 | fullEllapsedTime = ellapsedTimeNs*((double)nbTestPerf)*((double)nbCallPerTest); | |
| 109 | //We loop until we have one second of full time measurement | ||
| 110 | } | ||
| 111 | 66 | std::cout << "micro_benchmarkAutoNs : nbCallPerTest = " << nbCallPerTest << std::endl; | |
| 112 |
2/4✓ Branch 0 (14→15) taken 66 times.
✗ Branch 1 (14→16) not taken.
✗ Branch 2 (15→16) not taken.
✓ Branch 3 (15→18) taken 66 times.
|
66 | if(nbCallPerTest > 1000000000lu || nbCallPerTest == 0lu){ |
| 113 | ✗ | std::cout << "micro_benchmarkAutoNs : Warning invalid number of calls per test should lead to unrelevant results!!!" << std::endl; | |
| 114 | } | ||
| 115 | 66 | } | |
| 116 | |||
| 117 | ///Do the micro benchmarking of a given function and gives performance results in ns | ||
| 118 | /** @param[out] ellapsedTimeNs : ellapsed time in ns | ||
| 119 | * @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns | ||
| 120 | * @param[out] timePerElement : time per element in ns | ||
| 121 | * @param[out] timeErrorPerElement : error of time per element in ns | ||
| 122 | * @param nbTestPerf : number of performance test | ||
| 123 | * @param nbCallPerTest : number of calls per performance test | ||
| 124 | * @param nbElement : number of element treated by the function __f | ||
| 125 | * @param __f : function to be called and benchmarked | ||
| 126 | * @param __args : parameter of the function to be benchmarked | ||
| 127 | */ | ||
| 128 | template<typename _Callable, typename... _Args> | ||
| 129 | 204 | void micro_benchmarkNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, | |
| 130 | double & timePerElement, double & timeErrorPerElement, size_t nbTestPerf, size_t nbCallPerTest, size_t nbElement, | ||
| 131 | _Callable&& __f, _Args&&... __args) | ||
| 132 | { | ||
| 133 | 204 | micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, nbTestPerf, nbCallPerTest, __f, __args...); | |
| 134 | 204 | timePerElement = ellapsedTimeNs/((double)nbElement); | |
| 135 | 204 | timeErrorPerElement = ellapsedTimeErrorNs/((double)nbElement); | |
| 136 | 204 | } | |
| 137 | |||
| 138 | ///Do the micro benchmarking of a given function and gives performance results in ns | ||
| 139 | /** @param[out] ellapsedTimeNs : ellapsed time in ns | ||
| 140 | * @param[out] ellapsedTimeErrorNs : error on the ellapsed time in ns | ||
| 141 | * @param[out] timePerElement : time per element in ns | ||
| 142 | * @param[out] timeErrorPerElement : error of time per element in ns | ||
| 143 | * @param nbElement : number of element treated by the function __f | ||
| 144 | * @param __f : function to be called and benchmarked | ||
| 145 | * @param __args : parameter of the function to be benchmarked | ||
| 146 | */ | ||
| 147 | template<typename _Callable, typename... _Args> | ||
| 148 | 66 | void micro_benchmarkAutoNs(double & ellapsedTimeNs, double & ellapsedTimeErrorNs, | |
| 149 | double & timePerElement, double & timeErrorPerElement, size_t nbElement, | ||
| 150 | _Callable&& __f, _Args&&... __args) | ||
| 151 | { | ||
| 152 | 66 | micro_benchmarkAutoNs(ellapsedTimeNs, ellapsedTimeErrorNs, __f, __args...); | |
| 153 | 66 | timePerElement = ellapsedTimeNs/((double)nbElement); | |
| 154 | 66 | timeErrorPerElement = ellapsedTimeErrorNs/((double)nbElement); | |
| 155 | 66 | } | |
| 156 | |||
| 157 | ///Do the micro benchmarking of a given function and gives performance results in ns and print the result | ||
| 158 | /** @param testName : name of the performance test | ||
| 159 | * @param nbTestPerf : number of performance test | ||
| 160 | * @param nbCallPerTest : number of calls per performance test | ||
| 161 | * @param nbElement : number of element treated by the function __f | ||
| 162 | * @param __f : function to be called and benchmarked | ||
| 163 | * @param __args : parameter of the function to be benchmarked | ||
| 164 | */ | ||
| 165 | template<typename _Callable, typename... _Args> | ||
| 166 | 96 | void micro_benchmarkNsPrint(const std::string & testName, size_t nbTestPerf, size_t nbCallPerTest, size_t nbElement, _Callable&& __f, _Args&&... __args){ | |
| 167 | 96 | double ellapsedTimeNs(0.0), ellapsedTimeErrorNs(0.0), timePerElement(0.0), timeErrorPerElement(0.0); | |
| 168 |
1/1✓ Branch 0 (2→3) taken 96 times.
|
96 | micro_benchmarkNs(ellapsedTimeNs, ellapsedTimeErrorNs, timePerElement, timeErrorPerElement, nbTestPerf, nbCallPerTest, nbElement, __f, __args...); |
| 169 | |||
| 170 |
12/12✓ Branch 0 (3→4) taken 96 times.
✓ Branch 2 (4→5) taken 96 times.
✓ Branch 4 (5→6) taken 96 times.
✓ Branch 6 (6→7) taken 96 times.
✓ Branch 8 (7→8) taken 96 times.
✓ Branch 10 (8→9) taken 96 times.
✓ Branch 12 (9→10) taken 96 times.
✓ Branch 14 (10→11) taken 96 times.
✓ Branch 16 (11→12) taken 96 times.
✓ Branch 18 (12→13) taken 96 times.
✓ Branch 20 (13→14) taken 96 times.
✓ Branch 22 (14→15) taken 96 times.
|
96 | std::cout << testName << " : nbElement = "<<nbElement<<", timePerElement = " << timePerElement << " ns/el ± "<<timeErrorPerElement<<", elapsedTime = " << ellapsedTimeNs << " ns ± "<<ellapsedTimeErrorNs << std::endl; |
| 171 |
10/10✓ Branch 0 (15→16) taken 96 times.
✓ Branch 2 (16→17) taken 96 times.
✓ Branch 4 (17→18) taken 96 times.
✓ Branch 6 (18→19) taken 96 times.
✓ Branch 8 (19→20) taken 96 times.
✓ Branch 10 (20→21) taken 96 times.
✓ Branch 12 (21→22) taken 96 times.
✓ Branch 14 (22→23) taken 96 times.
✓ Branch 16 (23→24) taken 96 times.
✓ Branch 18 (24→25) taken 96 times.
|
96 | std::cerr << nbElement << "\t" << timePerElement << "\t" << ellapsedTimeNs << "\t" << timeErrorPerElement << "\t" << ellapsedTimeErrorNs << std::endl; |
| 172 | 96 | } | |
| 173 | |||
| 174 | ///Do the micro benchmarking of a given function and gives performance results in ns and print the result | ||
| 175 | /** @param testName : name of the performance test | ||
| 176 | * @param nbElement : number of element treated by the function __f | ||
| 177 | * @param __f : function to be called and benchmarked | ||
| 178 | * @param __args : parameter of the function to be benchmarked | ||
| 179 | */ | ||
| 180 | template<typename _Callable, typename... _Args> | ||
| 181 | 66 | void micro_benchmarkAutoNsPrint(const std::string & testName, size_t nbElement, _Callable&& __f, _Args&&... __args){ | |
| 182 | 66 | double ellapsedTimeNs(0.0), ellapsedTimeErrorNs(0.0), timePerElement(0.0), timeErrorPerElement(0.0); | |
| 183 |
1/1✓ Branch 0 (2→3) taken 66 times.
|
66 | micro_benchmarkAutoNs(ellapsedTimeNs, ellapsedTimeErrorNs, timePerElement, timeErrorPerElement, nbElement, __f, __args...); |
| 184 | |||
| 185 |
12/12✓ Branch 0 (3→4) taken 66 times.
✓ Branch 2 (4→5) taken 66 times.
✓ Branch 4 (5→6) taken 66 times.
✓ Branch 6 (6→7) taken 66 times.
✓ Branch 8 (7→8) taken 66 times.
✓ Branch 10 (8→9) taken 66 times.
✓ Branch 12 (9→10) taken 66 times.
✓ Branch 14 (10→11) taken 66 times.
✓ Branch 16 (11→12) taken 66 times.
✓ Branch 18 (12→13) taken 66 times.
✓ Branch 20 (13→14) taken 66 times.
✓ Branch 22 (14→15) taken 66 times.
|
66 | std::cout << testName << " : nbElement = "<<nbElement<<", timePerElement = " << timePerElement << " ns/el ± "<<timeErrorPerElement<<", elapsedTime = " << ellapsedTimeNs << " ns ± "<<ellapsedTimeErrorNs << std::endl; |
| 186 |
10/10✓ Branch 0 (15→16) taken 66 times.
✓ Branch 2 (16→17) taken 66 times.
✓ Branch 4 (17→18) taken 66 times.
✓ Branch 6 (18→19) taken 66 times.
✓ Branch 8 (19→20) taken 66 times.
✓ Branch 10 (20→21) taken 66 times.
✓ Branch 12 (21→22) taken 66 times.
✓ Branch 14 (22→23) taken 66 times.
✓ Branch 16 (23→24) taken 66 times.
✓ Branch 18 (24→25) taken 66 times.
|
66 | std::cerr << nbElement << "\t" << timePerElement << "\t" << ellapsedTimeNs << "\t" << timeErrorPerElement << "\t" << ellapsedTimeErrorNs << std::endl; |
| 187 | 66 | } | |
| 188 | |||
| 189 | #endif | ||
| 190 |