diff --git a/MatrixMultiply.py b/MatrixMultiply.py new file mode 100644 index 0000000..7e2979c --- /dev/null +++ b/MatrixMultiply.py @@ -0,0 +1,52 @@ +import sys +import random +import threading +from threading import Thread + +matrixDim = int(sys.argv[1]) +randomMaxValue = 100 +#t1 = [0 for i in range(matrixDim)] + +t1 = [] + +## INITIALISING MATRICES A B C + +matrixA = [[0 for i in range(matrixDim)] for j in range(matrixDim)] +matrixB = [[0 for i in range(matrixDim)] for j in range(matrixDim)] +matrixC = [[0 for i in range(matrixDim)] for j in range(matrixDim)] + +for i in range (0,matrixDim): + for j in range (0,matrixDim): + + matrixA[i][j] = random.randint(0, randomMaxValue) + matrixB[i][j] = random.randint(0, randomMaxValue) + +## DEFINING THREAD + +def MyThread1(arg): + + print "Thread " + str(arg) + + for j in range(0,matrixDim): + for k in range (0,matrixDim): + matrixC[arg][j] += matrixA[arg][k] * matrixB[k][j] +# print "Thread " + str(arg) + " " + str(j) + +def printMat(): + for i in range (0, matrixDim): + print "\n" + for j in range (0,matrixDim): + print str(matrixC[i][j]) + " ", + + +for i in range (0,matrixDim): + t = threading.Thread(target=MyThread1, args=(i, )) + t1.append(t) + t.start() + +for i in range (0,matrixDim): + t1[i].join() + +#for temp in t: +# temp.join() + diff --git a/finalExperimentRuns/MatrixMultiplier.class b/finalExperimentRuns/MatrixMultiplier.class new file mode 100644 index 0000000..1dc5df4 Binary files /dev/null and b/finalExperimentRuns/MatrixMultiplier.class differ diff --git a/finalExperimentRuns/MatrixMultiplier.java b/finalExperimentRuns/MatrixMultiplier.java new file mode 100644 index 0000000..2200614 --- /dev/null +++ b/finalExperimentRuns/MatrixMultiplier.java @@ -0,0 +1,134 @@ +import java.util.*; + +public class MatrixMultiplier { + + static int[][] matrixA; + static int[][] matrixB; + static int[][] matrixC; + + static final int randomMaxValue = 100; + + public MatrixMultiplier (int N) { + + matrixA = new int[N][N]; + matrixB = new int[N][N]; + matrixC = new int[N][N]; + + } + + public void fillMatrixRandom() { + + + Random rn = new Random(); + + + int i = 0; + int j = 0; + + for (i=0; i branch-misses + + 0.001226038 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.263159 task-clock # 0.297 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.483 M/sec + 791,625 cycles # 3.008 GHz + 482,299 stalled-cycles-frontend # 60.93% frontend cycles idle + 319,678 stalled-cycles-backend # 40.38% backend cycles idle + 550,734 instructions # 0.70 insns per cycle + # 0.88 stalled cycles per insn + 106,158 branches # 403.399 M/sec + branch-misses + + 0.000886637 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.263879 task-clock # 0.315 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.481 M/sec + 801,437 cycles # 3.037 GHz + 489,881 stalled-cycles-frontend # 61.13% frontend cycles idle + 298,681 stalled-cycles-backend # 37.27% backend cycles idle + 552,821 instructions # 0.69 insns per cycle + # 0.89 stalled cycles per insn + 106,287 branches # 402.787 M/sec + branch-misses + + 0.000838731 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.261170 task-clock # 0.325 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.486 M/sec + 794,662 cycles # 3.043 GHz + 483,324 stalled-cycles-frontend # 60.82% frontend cycles idle + 324,726 stalled-cycles-backend # 40.86% backend cycles idle + 552,409 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 106,457 branches # 407.616 M/sec + branch-misses + + 0.000802414 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.259817 task-clock # 0.203 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.489 M/sec + 789,490 cycles # 3.039 GHz + 480,273 stalled-cycles-frontend # 60.83% frontend cycles idle + 338,648 stalled-cycles-backend # 42.89% backend cycles idle + 549,001 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 105,856 branches # 407.425 M/sec + branch-misses + + 0.001277214 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.259312 task-clock # 0.237 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.490 M/sec + 788,093 cycles # 3.039 GHz + 480,367 stalled-cycles-frontend # 60.95% frontend cycles idle + 337,791 stalled-cycles-backend # 42.86% backend cycles idle + 542,295 instructions # 0.69 insns per cycle + # 0.89 stalled cycles per insn + 104,332 branches # 402.342 M/sec + branch-misses + + 0.001092917 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.291974 task-clock # 0.199 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.432 M/sec + 202,297 cycles # 0.693 GHz + 553,682 stalled-cycles-frontend # 273.70% frontend cycles idle + 387,858 stalled-cycles-backend # 191.73% backend cycles idle + 548,059 instructions # 2.71 insns per cycle + # 1.01 stalled cycles per insn + 105,755 branches # 362.207 M/sec + 7,631 branch-misses # 7.22% of all branches [73.27%] + + 0.001464449 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.260737 task-clock # 0.142 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.487 M/sec + 791,866 cycles # 3.037 GHz + 481,917 stalled-cycles-frontend # 60.86% frontend cycles idle + 338,794 stalled-cycles-backend # 42.78% backend cycles idle + 554,114 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 106,770 branches # 409.493 M/sec + branch-misses + + 0.001834688 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.265581 task-clock # 0.269 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.478 M/sec + 798,282 cycles # 3.006 GHz + 486,961 stalled-cycles-frontend # 61.00% frontend cycles idle + 343,026 stalled-cycles-backend # 42.97% backend cycles idle + 555,199 instructions # 0.70 insns per cycle + # 0.88 stalled cycles per insn + 106,951 branches # 402.706 M/sec + branch-misses + + 0.000988402 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang 10': + + 0.260504 task-clock # 0.180 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.484 M/sec + 792,825 cycles # 3.043 GHz + 482,255 stalled-cycles-frontend # 60.83% frontend cycles idle + 333,878 stalled-cycles-backend # 42.11% backend cycles idle + 551,967 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 106,390 branches # 408.401 M/sec + branch-misses + + 0.001444313 seconds time elapsed + diff --git a/finalExperimentRuns/clang_output_100 b/finalExperimentRuns/clang_output_100 new file mode 100644 index 0000000..62ab0c3 --- /dev/null +++ b/finalExperimentRuns/clang_output_100 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.053268 task-clock # 0.907 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.022 M/sec + 21,542,512 cycles # 3.054 GHz [43.51%] + 10,448,377 stalled-cycles-frontend # 48.50% frontend cycles idle + 1,620,562 stalled-cycles-backend # 7.52% backend cycles idle + 26,010,263 instructions # 1.21 insns per cycle + # 0.40 stalled cycles per insn + 2,497,209 branches # 354.050 M/sec + 11,897 branch-misses # 0.48% of all branches [71.92%] + + 0.007776978 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.156865 task-clock # 0.912 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.022 M/sec + 21,858,485 cycles # 3.054 GHz [45.75%] + 6,691,620 stalled-cycles-frontend # 30.61% frontend cycles idle + 2,072,908 stalled-cycles-backend # 9.48% backend cycles idle + 26,016,488 instructions # 1.19 insns per cycle + # 0.26 stalled cycles per insn + 2,497,845 branches # 349.014 M/sec + 17,479 branch-misses # 0.70% of all branches + + 0.007849703 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.165353 task-clock # 0.909 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.022 M/sec + 21,780,583 cycles # 3.040 GHz [44.38%] + 7,179,660 stalled-cycles-frontend # 32.96% frontend cycles idle + 2,012,094 stalled-cycles-backend # 9.24% backend cycles idle + 26,010,882 instructions # 1.19 insns per cycle + # 0.28 stalled cycles per insn + 2,497,319 branches # 348.527 M/sec + 15,296 branch-misses # 0.61% of all branches [98.02%] + + 0.007883982 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.098440 task-clock # 0.923 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.022 M/sec + 21,662,885 cycles # 3.052 GHz [43.88%] + 8,869,299 stalled-cycles-frontend # 40.94% frontend cycles idle + 1,888,070 stalled-cycles-backend # 8.72% backend cycles idle + 26,015,267 instructions # 1.20 insns per cycle + # 0.34 stalled cycles per insn + 2,498,064 branches # 351.917 M/sec + 11,733 branch-misses # 0.47% of all branches [84.57%] + + 0.007692507 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.074668 task-clock # 0.881 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.022 M/sec + 9,777,089 cycles # 1.382 GHz + 12,454,494 stalled-cycles-frontend # 127.38% frontend cycles idle + 406,671 stalled-cycles-backend # 4.16% backend cycles idle + 26,021,599 instructions # 2.66 insns per cycle + # 0.48 stalled cycles per insn + 2,498,735 branches # 353.195 M/sec + 12,178 branch-misses # 0.49% of all branches [54.81%] + + 0.008028569 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.085217 task-clock # 0.921 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.022 M/sec + 21,555,105 cycles # 3.042 GHz [43.77%] + 7,800,102 stalled-cycles-frontend # 36.19% frontend cycles idle + 1,788,484 stalled-cycles-backend # 8.30% backend cycles idle + 26,025,558 instructions # 1.21 insns per cycle + # 0.30 stalled cycles per insn + 2,499,437 branches # 352.768 M/sec + 11,719 branch-misses # 0.47% of all branches [92.55%] + + 0.007694468 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.112728 task-clock # 0.634 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.022 M/sec + 21,725,477 cycles # 3.054 GHz [45.19%] + 6,733,958 stalled-cycles-frontend # 31.00% frontend cycles idle + 1,849,361 stalled-cycles-backend # 8.51% backend cycles idle + 26,024,358 instructions # 1.20 insns per cycle + # 0.26 stalled cycles per insn + 2,499,211 branches # 351.372 M/sec + 17,591 branch-misses # 0.70% of all branches + + 0.011223178 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.068877 task-clock # 0.921 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.022 M/sec + 21,592,249 cycles # 3.055 GHz [43.63%] + 7,807,904 stalled-cycles-frontend # 36.16% frontend cycles idle + 1,825,484 stalled-cycles-backend # 8.45% backend cycles idle + 26,012,472 instructions # 1.20 insns per cycle + # 0.30 stalled cycles per insn + 2,497,566 branches # 353.319 M/sec + 11,719 branch-misses # 0.47% of all branches [92.66%] + + 0.007675305 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.096350 task-clock # 0.916 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.022 M/sec + 21,606,442 cycles # 3.045 GHz [43.82%] + 9,785,352 stalled-cycles-frontend # 45.29% frontend cycles idle + 1,812,444 stalled-cycles-backend # 8.39% backend cycles idle + 26,027,894 instructions # 1.20 insns per cycle + # 0.38 stalled cycles per insn + 2,499,871 branches # 352.276 M/sec + 11,876 branch-misses # 0.48% of all branches [77.30%] + + 0.007747817 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang 100': + + 7.154863 task-clock # 0.920 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.022 M/sec + 21,831,902 cycles # 3.051 GHz [44.34%] + 10,234,895 stalled-cycles-frontend # 46.88% frontend cycles idle + 2,001,267 stalled-cycles-backend # 9.17% backend cycles idle + 26,012,404 instructions # 1.19 insns per cycle + # 0.39 stalled cycles per insn + 2,497,750 branches # 349.098 M/sec + 11,843 branch-misses # 0.47% of all branches [74.43%] + + 0.007774080 seconds time elapsed + diff --git a/finalExperimentRuns/clang_output_1000 b/finalExperimentRuns/clang_output_1000 new file mode 100644 index 0000000..9a89831 --- /dev/null +++ b/finalExperimentRuns/clang_output_1000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8772.711871 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,862,536,545 cycles # 3.062 GHz [83.32%] + 18,725,249,733 stalled-cycles-frontend # 69.71% frontend cycles idle [83.31%] + 6,721,535,337 stalled-cycles-backend # 25.02% backend cycles idle [66.69%] + 24,147,572,347 instructions # 0.90 insns per cycle + # 0.78 stalled cycles per insn [83.35%] + 2,039,094,766 branches # 232.436 M/sec [83.35%] + 1,418,553 branch-misses # 0.07% of all branches [83.35%] + + 8.795813556 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8729.146601 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,728,257,650 cycles # 3.062 GHz [83.32%] + 18,598,831,843 stalled-cycles-frontend # 69.58% frontend cycles idle [83.32%] + 6,692,039,477 stalled-cycles-backend # 25.04% backend cycles idle [66.68%] + 24,143,696,220 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.36%] + 2,039,089,497 branches # 233.596 M/sec [83.36%] + 1,422,028 branch-misses # 0.07% of all branches [83.33%] + + 8.751936365 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8743.005082 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,760,005,251 cycles # 3.061 GHz [83.34%] + 18,636,540,129 stalled-cycles-frontend # 69.64% frontend cycles idle [83.34%] + 6,696,730,595 stalled-cycles-backend # 25.03% backend cycles idle [66.69%] + 24,146,489,634 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.34%] + 2,039,117,188 branches # 233.228 M/sec [83.34%] + 1,512,869 branch-misses # 0.07% of all branches [83.32%] + + 8.765745532 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8842.501415 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 27,075,333,912 cycles # 3.062 GHz [83.30%] + 18,932,760,043 stalled-cycles-frontend # 69.93% frontend cycles idle [83.34%] + 6,827,630,830 stalled-cycles-backend # 25.22% backend cycles idle [66.70%] + 24,145,756,817 instructions # 0.89 insns per cycle + # 0.78 stalled cycles per insn [83.35%] + 2,039,239,015 branches # 230.618 M/sec [83.35%] + 1,496,833 branch-misses # 0.07% of all branches [83.33%] + + 8.865507756 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8796.936234 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,935,212,998 cycles # 3.062 GHz [83.31%] + 18,792,486,212 stalled-cycles-frontend # 69.77% frontend cycles idle [83.31%] + 6,800,634,030 stalled-cycles-backend # 25.25% backend cycles idle [66.71%] + 24,146,395,655 instructions # 0.90 insns per cycle + # 0.78 stalled cycles per insn [83.35%] + 2,039,272,337 branches # 231.816 M/sec [83.35%] + 1,994,410 branch-misses # 0.10% of all branches [83.32%] + + 8.820084000 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8777.014189 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,875,307,479 cycles # 3.062 GHz [83.32%] + 18,740,496,292 stalled-cycles-frontend # 69.73% frontend cycles idle [83.32%] + 6,705,290,080 stalled-cycles-backend # 24.95% backend cycles idle [66.68%] + 24,144,531,987 instructions # 0.90 insns per cycle + # 0.78 stalled cycles per insn [83.36%] + 2,039,002,045 branches # 232.312 M/sec [83.36%] + 1,448,566 branch-misses # 0.07% of all branches [83.34%] + + 8.799930149 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8796.815931 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,935,093,613 cycles # 3.062 GHz [83.32%] + 18,796,887,746 stalled-cycles-frontend # 69.79% frontend cycles idle [83.31%] + 6,818,360,264 stalled-cycles-backend # 25.31% backend cycles idle [66.69%] + 24,148,095,266 instructions # 0.90 insns per cycle + # 0.78 stalled cycles per insn [83.35%] + 2,039,005,393 branches # 231.789 M/sec [83.35%] + 1,638,652 branch-misses # 0.08% of all branches [83.35%] + + 8.819932807 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8732.240674 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,737,479,415 cycles # 3.062 GHz [83.32%] + 18,601,807,786 stalled-cycles-frontend # 69.57% frontend cycles idle [83.32%] + 6,561,397,504 stalled-cycles-backend # 24.54% backend cycles idle [66.64%] + 24,149,772,261 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.32%] + 2,038,395,170 branches # 233.433 M/sec [83.36%] + 1,485,301 branch-misses # 0.07% of all branches [83.36%] + + 8.757509097 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8706.559421 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,659,676,782 cycles # 3.062 GHz [83.32%] + 18,523,302,399 stalled-cycles-frontend # 69.48% frontend cycles idle [83.32%] + 6,570,207,292 stalled-cycles-backend # 24.64% backend cycles idle [66.67%] + 24,136,767,039 instructions # 0.91 insns per cycle + # 0.77 stalled cycles per insn [83.36%] + 2,038,880,459 branches # 234.178 M/sec [83.36%] + 1,511,548 branch-misses # 0.07% of all branches [83.36%] + + 8.729259859 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang 1000': + + 8744.544589 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,775,031,633 cycles # 3.062 GHz [83.30%] + 18,640,605,103 stalled-cycles-frontend # 69.62% frontend cycles idle [83.34%] + 6,628,187,063 stalled-cycles-backend # 24.76% backend cycles idle [66.69%] + 24,147,316,203 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.35%] + 2,038,977,599 branches # 233.171 M/sec [83.35%] + 1,410,532 branch-misses # 0.07% of all branches [83.34%] + + 8.767426048 seconds time elapsed + diff --git a/finalExperimentRuns/clang_output_3000 b/finalExperimentRuns/clang_output_3000 new file mode 100644 index 0000000..67b7ded --- /dev/null +++ b/finalExperimentRuns/clang_output_3000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply_clang 3000': + + 435870.895044 task-clock # 0.997 CPUs utilized + 547 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,334,464,156,843 cycles # 3.062 GHz [83.33%] + 1,116,588,716,003 stalled-cycles-frontend # 83.67% frontend cycles idle [83.33%] + 760,549,402,793 stalled-cycles-backend # 56.99% backend cycles idle [66.67%] + 649,625,214,346 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,405,175,566 branches # 124.819 M/sec [83.33%] + 31,641,759 branch-misses # 0.06% of all branches [83.33%] + + 436.970014074 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436941.138626 task-clock # 0.997 CPUs utilized + 547 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,337,726,968,521 cycles # 3.062 GHz [83.33%] + 1,119,881,200,210 stalled-cycles-frontend # 83.72% frontend cycles idle [83.33%] + 763,263,721,765 stalled-cycles-backend # 57.06% backend cycles idle [66.67%] + 649,639,608,656 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,408,743,887 branches # 124.522 M/sec [83.33%] + 30,480,854 branch-misses # 0.06% of all branches [83.33%] + + 438.038257320 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436215.746174 task-clock # 0.997 CPUs utilized + 545 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,335,345,423,489 cycles # 3.061 GHz [83.33%] + 1,117,485,526,106 stalled-cycles-frontend # 83.69% frontend cycles idle [83.33%] + 759,502,326,023 stalled-cycles-backend # 56.88% backend cycles idle [66.67%] + 649,657,788,647 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,408,979,455 branches # 124.730 M/sec [83.33%] + 29,850,475 branch-misses # 0.05% of all branches [83.33%] + + 437.311297435 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply_clang 3000': + + 434660.839337 task-clock # 0.997 CPUs utilized + 544 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,330,746,441,063 cycles # 3.062 GHz [83.33%] + 1,112,865,667,103 stalled-cycles-frontend # 83.63% frontend cycles idle [83.33%] + 755,733,809,906 stalled-cycles-backend # 56.79% backend cycles idle [66.67%] + 649,629,467,424 instructions # 0.49 insns per cycle + # 1.71 stalled cycles per insn [83.33%] + 54,405,944,056 branches # 125.169 M/sec [83.33%] + 30,324,200 branch-misses # 0.06% of all branches [83.33%] + + 435.752964281 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply_clang 3000': + + 438781.815155 task-clock # 0.997 CPUs utilized + 550 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,343,359,727,224 cycles # 3.062 GHz [83.33%] + 1,125,419,926,576 stalled-cycles-frontend # 83.78% frontend cycles idle [83.33%] + 768,084,785,121 stalled-cycles-backend # 57.18% backend cycles idle [66.67%] + 649,665,121,934 instructions # 0.48 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,408,662,474 branches # 123.999 M/sec [83.33%] + 30,594,105 branch-misses # 0.06% of all branches [83.33%] + + 439.883691061 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436113.735318 task-clock # 0.997 CPUs utilized + 546 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,335,204,076,506 cycles # 3.062 GHz [83.33%] + 1,117,295,887,936 stalled-cycles-frontend # 83.68% frontend cycles idle [83.33%] + 749,322,042,821 stalled-cycles-backend # 56.12% backend cycles idle [66.67%] + 649,625,709,220 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,406,397,138 branches # 124.753 M/sec [83.33%] + 30,194,121 branch-misses # 0.06% of all branches [83.33%] + + 437.209951172 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436287.286674 task-clock # 0.997 CPUs utilized + 546 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,335,745,938,506 cycles # 3.062 GHz [83.33%] + 1,117,869,103,390 stalled-cycles-frontend # 83.69% frontend cycles idle [83.33%] + 758,032,143,399 stalled-cycles-backend # 56.75% backend cycles idle [66.67%] + 649,644,491,128 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,406,028,719 branches # 124.702 M/sec [83.33%] + 30,072,867 branch-misses # 0.06% of all branches [83.33%] + + 437.394865809 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply_clang 3000': + + 437543.330568 task-clock # 0.997 CPUs utilized + 546 context-switches # 0.000 M/sec + 2 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,339,450,532,302 cycles # 3.061 GHz [83.33%] + 1,121,680,166,977 stalled-cycles-frontend # 83.74% frontend cycles idle [83.33%] + 764,834,662,020 stalled-cycles-backend # 57.10% backend cycles idle [66.67%] + 649,641,184,728 instructions # 0.49 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,409,529,224 branches # 124.352 M/sec [83.33%] + 30,174,500 branch-misses # 0.06% of all branches [83.33%] + + 438.656887155 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply_clang 3000': + + 437737.932628 task-clock # 0.997 CPUs utilized + 549 context-switches # 0.000 M/sec + 2 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,340,168,475,305 cycles # 3.062 GHz [83.33%] + 1,122,266,697,544 stalled-cycles-frontend # 83.74% frontend cycles idle [83.33%] + 762,038,651,178 stalled-cycles-backend # 56.86% backend cycles idle [66.67%] + 649,657,223,429 instructions # 0.48 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,407,338,177 branches # 124.292 M/sec [83.33%] + 29,532,180 branch-misses # 0.05% of all branches [83.33%] + + 438.836735033 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang 3000': + + 438463.682513 task-clock # 0.997 CPUs utilized + 548 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,342,374,344,170 cycles # 3.062 GHz [83.33%] + 1,124,481,215,389 stalled-cycles-frontend # 83.77% frontend cycles idle [83.33%] + 765,398,751,304 stalled-cycles-backend # 57.02% backend cycles idle [66.67%] + 649,726,904,426 instructions # 0.48 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,402,887,069 branches # 124.076 M/sec [83.33%] + 30,000,209 branch-misses # 0.06% of all branches [83.33%] + + 439.566853455 seconds time elapsed + diff --git a/finalExperimentRuns/clang_output_optimized_10 b/finalExperimentRuns/clang_output_optimized_10 new file mode 100644 index 0000000..62f3d83 --- /dev/null +++ b/finalExperimentRuns/clang_output_optimized_10 @@ -0,0 +1,180 @@ +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.254738 task-clock # 0.186 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.499 M/sec + 773,561 cycles # 3.037 GHz + 467,851 stalled-cycles-frontend # 60.48% frontend cycles idle + 310,294 stalled-cycles-backend # 40.11% backend cycles idle + 535,991 instructions # 0.69 insns per cycle + # 0.87 stalled cycles per insn + 104,877 branches # 411.705 M/sec + branch-misses + + 0.001369651 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.253462 task-clock # 0.312 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.501 M/sec + 770,150 cycles # 3.039 GHz + 467,124 stalled-cycles-frontend # 60.65% frontend cycles idle + 321,201 stalled-cycles-backend # 41.71% backend cycles idle + 537,198 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 105,085 branches # 414.599 M/sec + branch-misses + + 0.000812752 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.252371 task-clock # 0.313 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.503 M/sec + 767,605 cycles # 3.042 GHz + 462,629 stalled-cycles-frontend # 60.27% frontend cycles idle + 328,909 stalled-cycles-backend # 42.85% backend cycles idle + 536,671 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 104,982 branches # 415.983 M/sec + branch-misses + + 0.000806311 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.255736 task-clock # 0.336 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.493 M/sec + 762,557 cycles # 2.982 GHz + 460,565 stalled-cycles-frontend # 60.40% frontend cycles idle + 323,600 stalled-cycles-backend # 42.44% backend cycles idle + 538,377 instructions # 0.71 insns per cycle + # 0.86 stalled cycles per insn + 105,324 branches # 411.847 M/sec + branch-misses + + 0.000760748 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.256619 task-clock # 0.325 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.495 M/sec + 772,169 cycles # 3.009 GHz + 465,669 stalled-cycles-frontend # 60.31% frontend cycles idle + 306,523 stalled-cycles-backend # 39.70% backend cycles idle + 541,471 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,836 branches # 412.425 M/sec + branch-misses + + 0.000790370 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.252237 task-clock # 0.135 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.503 M/sec + 766,577 cycles # 3.039 GHz + 463,314 stalled-cycles-frontend # 60.44% frontend cycles idle + 326,037 stalled-cycles-backend # 42.53% backend cycles idle + 538,054 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,236 branches # 417.211 M/sec + branch-misses + + 0.001870156 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.260936 task-clock # 0.320 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.487 M/sec + 779,990 cycles # 2.989 GHz + 473,885 stalled-cycles-frontend # 60.76% frontend cycles idle + 324,297 stalled-cycles-backend # 41.58% backend cycles idle + 539,619 instructions # 0.69 insns per cycle + # 0.88 stalled cycles per insn + 105,485 branches # 404.256 M/sec + branch-misses + + 0.000815701 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.246294 task-clock # 0.338 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.516 M/sec + cycles + 443,508 stalled-cycles-frontend # 0.00% frontend cycles idle + 317,155 stalled-cycles-backend # 0.00% backend cycles idle + 542,075 instructions # 0.00 insns per cycle + # 0.82 stalled cycles per insn + 105,925 branches # 430.075 M/sec + 6,517 branch-misses # 6.15% of all branches + + 0.000728607 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.242933 task-clock # 0.293 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.523 M/sec + 727,267 cycles # 2.994 GHz + 425,098 stalled-cycles-frontend # 58.45% frontend cycles idle + 304,182 stalled-cycles-backend # 41.83% backend cycles idle + 537,280 instructions # 0.74 insns per cycle + # 0.79 stalled cycles per insn + 105,121 branches # 432.716 M/sec + branch-misses + + 0.000830440 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.256732 task-clock # 0.216 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.491 M/sec + 768,563 cycles # 2.994 GHz + 464,524 stalled-cycles-frontend # 60.44% frontend cycles idle + 330,887 stalled-cycles-backend # 43.05% backend cycles idle + 536,485 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 104,979 branches # 408.905 M/sec + branch-misses + + 0.001186233 seconds time elapsed + diff --git a/finalExperimentRuns/clang_output_optimized_100 b/finalExperimentRuns/clang_output_optimized_100 new file mode 100644 index 0000000..0924dc0 --- /dev/null +++ b/finalExperimentRuns/clang_output_optimized_100 @@ -0,0 +1,180 @@ +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.962034 task-clock # 0.665 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.081 M/sec + 6,010,763 cycles # 3.064 GHz + 884,158 stalled-cycles-frontend # 14.71% frontend cycles idle + 737,567 stalled-cycles-backend # 12.27% backend cycles idle + 11,047,971 instructions # 1.84 insns per cycle + # 0.08 stalled cycles per insn + 1,468,085 branches # 748.246 M/sec + branch-misses + + 0.002949295 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.948140 task-clock # 0.439 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,966,906 cycles # 3.063 GHz + 855,303 stalled-cycles-frontend # 14.33% frontend cycles idle + 694,676 stalled-cycles-backend # 11.64% backend cycles idle + 11,045,804 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,692 branches # 753.381 M/sec + branch-misses + + 0.004435255 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.947260 task-clock # 0.765 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,964,814 cycles # 3.063 GHz + 855,649 stalled-cycles-frontend # 14.34% frontend cycles idle + 710,245 stalled-cycles-backend # 11.91% backend cycles idle + 11,043,997 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,392 branches # 753.568 M/sec + branch-misses + + 0.002546083 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.947560 task-clock # 0.765 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,952,625 cycles # 3.056 GHz + 850,406 stalled-cycles-frontend # 14.29% frontend cycles idle + 375,296 stalled-cycles-backend # 6.30% backend cycles idle + 11,022,704 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,463,754 branches # 751.584 M/sec + branch-misses + + 0.002546145 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.943698 task-clock # 0.674 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,952,763 cycles # 3.063 GHz + 846,909 stalled-cycles-frontend # 14.23% frontend cycles idle + 708,785 stalled-cycles-backend # 11.91% backend cycles idle + 11,039,332 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,466,568 branches # 754.525 M/sec + branch-misses + + 0.002885840 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.949368 task-clock # 0.765 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,970,129 cycles # 3.063 GHz + 859,909 stalled-cycles-frontend # 14.40% frontend cycles idle + 722,803 stalled-cycles-backend # 12.11% backend cycles idle + 11,045,169 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,572 branches # 752.845 M/sec + branch-misses + + 0.002548488 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 2.021036 task-clock # 0.771 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.078 M/sec + 3,528,170 cycles # 1.746 GHz + 1,009,925 stalled-cycles-frontend # 28.62% frontend cycles idle + 726,356 stalled-cycles-backend # 20.59% backend cycles idle + 11,046,780 instructions # 3.13 insns per cycle + # 0.09 stalled cycles per insn + 1,467,943 branches # 726.332 M/sec + 14,164 branch-misses # 0.96% of all branches [42.61%] + + 0.002620696 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.959283 task-clock # 0.779 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,463,297 cycles # 2.788 GHz + 862,862 stalled-cycles-frontend # 15.79% frontend cycles idle + 699,735 stalled-cycles-backend # 12.81% backend cycles idle + 11,048,711 instructions # 2.02 insns per cycle + # 0.08 stalled cycles per insn + 1,468,247 branches # 749.380 M/sec + 17,826 branch-misses # 1.21% of all branches [ 8.79%] + + 0.002515733 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.948017 task-clock # 0.780 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,966,888 cycles # 3.063 GHz + 856,064 stalled-cycles-frontend # 14.35% frontend cycles idle + 711,299 stalled-cycles-backend # 11.92% backend cycles idle + 11,046,813 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,618 branches # 753.391 M/sec + branch-misses + + 0.002498457 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.949114 task-clock # 0.785 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,952,365 cycles # 3.054 GHz + 848,125 stalled-cycles-frontend # 14.25% frontend cycles idle + 694,927 stalled-cycles-backend # 11.67% backend cycles idle + 11,033,183 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,465,119 branches # 751.685 M/sec + branch-misses + + 0.002481376 seconds time elapsed + diff --git a/finalExperimentRuns/clang_output_optimized_1000 b/finalExperimentRuns/clang_output_optimized_1000 new file mode 100644 index 0000000..8ca645e --- /dev/null +++ b/finalExperimentRuns/clang_output_optimized_1000 @@ -0,0 +1,180 @@ +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8514.301983 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,069,051,123 cycles # 3.062 GHz [83.32%] + 20,139,826,256 stalled-cycles-frontend # 77.26% frontend cycles idle [83.32%] + 10,074,406,315 stalled-cycles-backend # 38.65% backend cycles idle [66.68%] + 9,145,854,517 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.36%] + 1,035,460,914 branches # 121.614 M/sec [83.36%] + 1,085,246 branch-misses # 0.10% of all branches [83.34%] + + 8.536668414 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8533.423369 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,129,292,006 cycles # 3.062 GHz [83.31%] + 20,207,838,356 stalled-cycles-frontend # 77.34% frontend cycles idle [83.31%] + 9,007,753,681 stalled-cycles-backend # 34.47% backend cycles idle [66.71%] + 9,149,245,795 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.35%] + 1,035,427,362 branches # 121.338 M/sec [83.35%] + 1,083,353 branch-misses # 0.10% of all branches [83.33%] + + 8.555680250 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8524.533087 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,102,555,736 cycles # 3.062 GHz [83.34%] + 20,174,883,322 stalled-cycles-frontend # 77.29% frontend cycles idle [83.34%] + 9,795,587,822 stalled-cycles-backend # 37.53% backend cycles idle [66.67%] + 9,149,253,149 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.34%] + 1,035,420,531 branches # 121.464 M/sec [83.34%] + 1,081,429 branch-misses # 0.10% of all branches [83.35%] + + 8.546955580 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8558.061972 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,204,898,727 cycles # 3.062 GHz [83.31%] + 20,288,761,306 stalled-cycles-frontend # 77.42% frontend cycles idle [83.31%] + 9,856,648,639 stalled-cycles-backend # 37.61% backend cycles idle [66.71%] + 9,149,320,923 instructions # 0.35 insns per cycle + # 2.22 stalled cycles per insn [83.36%] + 1,035,453,678 branches # 120.992 M/sec [83.36%] + 1,081,282 branch-misses # 0.10% of all branches [83.32%] + + 8.580455767 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8513.464377 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,067,845,209 cycles # 3.062 GHz [83.32%] + 20,133,798,950 stalled-cycles-frontend # 77.24% frontend cycles idle [83.32%] + 8,624,644,078 stalled-cycles-backend # 33.09% backend cycles idle [66.67%] + 9,145,773,113 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.36%] + 1,035,458,509 branches # 121.626 M/sec [83.36%] + 1,089,716 branch-misses # 0.11% of all branches [83.35%] + + 8.535682415 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8519.104099 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,085,519,997 cycles # 3.062 GHz [83.33%] + 20,156,984,198 stalled-cycles-frontend # 77.27% frontend cycles idle [83.33%] + 8,787,917,469 stalled-cycles-backend # 33.69% backend cycles idle [66.65%] + 9,149,566,196 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.33%] + 1,034,627,980 branches # 121.448 M/sec [83.37%] + 1,113,417 branch-misses # 0.11% of all branches [83.36%] + + 8.541426918 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8525.969981 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,106,906,116 cycles # 3.062 GHz [83.30%] + 20,176,019,515 stalled-cycles-frontend # 77.28% frontend cycles idle [83.34%] + 8,065,406,321 stalled-cycles-backend # 30.89% backend cycles idle [66.68%] + 9,149,290,275 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.34%] + 1,035,439,656 branches # 121.445 M/sec [83.34%] + 1,090,091 branch-misses # 0.11% of all branches [83.34%] + + 8.548269885 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8530.756566 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,120,379,015 cycles # 3.062 GHz [83.30%] + 20,196,068,508 stalled-cycles-frontend # 77.32% frontend cycles idle [83.30%] + 9,181,947,822 stalled-cycles-backend # 35.15% backend cycles idle [66.70%] + 9,150,553,323 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.35%] + 1,035,656,501 branches # 121.403 M/sec [83.35%] + 1,099,565 branch-misses # 0.11% of all branches [83.35%] + + 8.553018234 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8537.180438 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,141,449,324 cycles # 3.062 GHz [83.31%] + 20,217,377,597 stalled-cycles-frontend # 77.34% frontend cycles idle [83.32%] + 7,883,092,404 stalled-cycles-backend # 30.16% backend cycles idle [66.67%] + 9,145,842,530 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.36%] + 1,035,464,658 branches # 121.289 M/sec [83.36%] + 1,094,606 branch-misses # 0.11% of all branches [83.35%] + + 8.561051194 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8521.967213 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,092,302,546 cycles # 3.062 GHz [83.33%] + 20,165,608,254 stalled-cycles-frontend # 77.29% frontend cycles idle [83.33%] + 9,103,949,656 stalled-cycles-backend # 34.89% backend cycles idle [66.66%] + 9,149,688,050 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.33%] + 1,035,529,349 branches # 121.513 M/sec [83.33%] + 1,095,002 branch-misses # 0.11% of all branches [83.36%] + + 8.544265295 seconds time elapsed + diff --git a/finalExperimentRuns/clang_output_optimized_3000 b/finalExperimentRuns/clang_output_optimized_3000 new file mode 100644 index 0000000..58371ab --- /dev/null +++ b/finalExperimentRuns/clang_output_optimized_3000 @@ -0,0 +1,180 @@ +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 312664.887494 task-clock # 0.997 CPUs utilized + 392 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 957,194,643,582 cycles # 3.061 GHz [83.33%] + 801,278,508,780 stalled-cycles-frontend # 83.71% frontend cycles idle [83.33%] + 654,614,518,739 stalled-cycles-backend # 68.39% backend cycles idle [66.67%] + 244,566,461,305 instructions # 0.26 insns per cycle + # 3.28 stalled cycles per insn [83.33%] + 27,360,317,759 branches # 87.507 M/sec [83.33%] + 13,880,850 branch-misses # 0.05% of all branches [83.33%] + + 313.450793586 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 312283.288554 task-clock # 0.997 CPUs utilized + 391 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 955,893,620,144 cycles # 3.061 GHz [83.33%] + 799,959,307,996 stalled-cycles-frontend # 83.69% frontend cycles idle [83.33%] + 624,558,986,255 stalled-cycles-backend # 65.34% backend cycles idle [66.67%] + 244,567,518,923 instructions # 0.26 insns per cycle + # 3.27 stalled cycles per insn [83.33%] + 27,361,026,722 branches # 87.616 M/sec [83.33%] + 12,927,656 branch-misses # 0.05% of all branches [83.33%] + + 313.069529799 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 314069.326289 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 961,431,758,518 cycles # 3.061 GHz [83.33%] + 805,494,232,345 stalled-cycles-frontend # 83.78% frontend cycles idle [83.33%] + 630,365,211,190 stalled-cycles-backend # 65.57% backend cycles idle [66.67%] + 244,567,418,849 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,359,270,785 branches # 87.112 M/sec [83.33%] + 12,925,094 branch-misses # 0.05% of all branches [83.33%] + + 314.901173672 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313605.668691 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 960,099,341,079 cycles # 3.061 GHz [83.33%] + 804,209,745,312 stalled-cycles-frontend # 83.76% frontend cycles idle [83.33%] + 650,945,842,810 stalled-cycles-backend # 67.80% backend cycles idle [66.67%] + 244,552,833,312 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,360,236,458 branches # 87.244 M/sec [83.33%] + 13,471,738 branch-misses # 0.05% of all branches [83.33%] + + 314.393884834 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313966.377674 task-clock # 0.997 CPUs utilized + 394 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 961,182,730,314 cycles # 3.061 GHz [83.33%] + 805,474,890,245 stalled-cycles-frontend # 83.80% frontend cycles idle [83.33%] + 655,175,905,580 stalled-cycles-backend # 68.16% backend cycles idle [66.67%] + 244,546,839,813 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,360,307,420 branches # 87.144 M/sec [83.33%] + 13,625,821 branch-misses # 0.05% of all branches [83.33%] + + 314.755038625 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 315025.036094 task-clock # 0.997 CPUs utilized + 395 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 964,449,384,871 cycles # 3.062 GHz [83.33%] + 808,592,472,928 stalled-cycles-frontend # 83.84% frontend cycles idle [83.33%] + 646,222,264,512 stalled-cycles-backend # 67.00% backend cycles idle [66.67%] + 244,552,197,147 instructions # 0.25 insns per cycle + # 3.31 stalled cycles per insn [83.33%] + 27,358,770,723 branches # 86.846 M/sec [83.33%] + 13,799,312 branch-misses # 0.05% of all branches [83.33%] + + 315.816146040 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313884.082830 task-clock # 0.997 CPUs utilized + 394 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 960,938,455,254 cycles # 3.061 GHz [83.33%] + 804,967,633,007 stalled-cycles-frontend # 83.77% frontend cycles idle [83.33%] + 658,597,647,955 stalled-cycles-backend # 68.54% backend cycles idle [66.67%] + 244,565,187,191 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,361,335,010 branches # 87.170 M/sec [83.33%] + 13,868,957 branch-misses # 0.05% of all branches [83.33%] + + 314.672105390 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313070.788902 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 958,299,705,663 cycles # 3.061 GHz [83.33%] + 802,791,733,825 stalled-cycles-frontend # 83.77% frontend cycles idle [83.33%] + 620,995,884,839 stalled-cycles-backend # 64.80% backend cycles idle [66.67%] + 244,575,491,330 instructions # 0.26 insns per cycle + # 3.28 stalled cycles per insn [83.33%] + 27,361,827,022 branches # 87.398 M/sec [83.33%] + 13,026,877 branch-misses # 0.05% of all branches [83.33%] + + 313.856410059 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 315731.190936 task-clock # 0.997 CPUs utilized + 396 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 966,582,281,999 cycles # 3.061 GHz [83.33%] + 810,643,864,844 stalled-cycles-frontend # 83.87% frontend cycles idle [83.33%] + 653,645,811,041 stalled-cycles-backend # 67.62% backend cycles idle [66.67%] + 244,568,729,928 instructions # 0.25 insns per cycle + # 3.31 stalled cycles per insn [83.33%] + 27,361,596,499 branches # 86.661 M/sec [83.33%] + 13,710,796 branch-misses # 0.05% of all branches [83.33%] + + 316.523716704 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 312952.702244 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 957,972,776,248 cycles # 3.061 GHz [83.33%] + 802,443,920,049 stalled-cycles-frontend # 83.76% frontend cycles idle [83.33%] + 640,914,452,512 stalled-cycles-backend # 66.90% backend cycles idle [66.67%] + 244,578,176,924 instructions # 0.26 insns per cycle + # 3.28 stalled cycles per insn [83.33%] + 27,363,549,903 branches # 87.437 M/sec [83.33%] + 13,598,396 branch-misses # 0.05% of all branches [83.33%] + + 313.738715536 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_10 b/finalExperimentRuns/gcc_output_10 new file mode 100644 index 0000000..d533c09 --- /dev/null +++ b/finalExperimentRuns/gcc_output_10 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply 10': + + 0.264216 task-clock # 0.234 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.481 M/sec + 801,618 cycles # 3.034 GHz + 485,598 stalled-cycles-frontend # 60.58% frontend cycles idle + 331,763 stalled-cycles-backend # 41.39% backend cycles idle + 579,543 instructions # 0.72 insns per cycle + # 0.84 stalled cycles per insn + 105,217 branches # 398.223 M/sec + branch-misses + + 0.001130275 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply 10': + + 0.260784 task-clock # 0.312 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.487 M/sec + 793,277 cycles # 3.042 GHz + 476,100 stalled-cycles-frontend # 60.02% frontend cycles idle + 334,152 stalled-cycles-backend # 42.12% backend cycles idle + 582,916 instructions # 0.73 insns per cycle + # 0.82 stalled cycles per insn + 105,800 branches # 405.700 M/sec + branch-misses + + 0.000836036 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply 10': + + 0.260142 task-clock # 0.341 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.488 M/sec + cycles + 469,818 stalled-cycles-frontend # 0.00% frontend cycles idle + 330,664 stalled-cycles-backend # 0.00% backend cycles idle + 581,476 instructions # 0.00 insns per cycle + # 0.81 stalled cycles per insn + 105,525 branches # 405.644 M/sec + 6,383 branch-misses # 6.05% of all branches + + 0.000763764 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply 10': + + 0.265972 task-clock # 0.264 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.477 M/sec + 783,856 cycles # 2.947 GHz + 469,562 stalled-cycles-frontend # 59.90% frontend cycles idle + 316,315 stalled-cycles-backend # 40.35% backend cycles idle + 579,399 instructions # 0.74 insns per cycle + # 0.81 stalled cycles per insn + 105,152 branches # 395.350 M/sec + branch-misses + + 0.001005965 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply 10': + + 0.262319 task-clock # 0.287 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.480 M/sec + 798,897 cycles # 3.046 GHz + 481,149 stalled-cycles-frontend # 60.23% frontend cycles idle + 294,473 stalled-cycles-backend # 36.86% backend cycles idle + 584,326 instructions # 0.73 insns per cycle + # 0.82 stalled cycles per insn + 105,770 branches # 403.211 M/sec + branch-misses + + 0.000915591 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply 10': + + 0.261447 task-clock # 0.150 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.486 M/sec + 795,646 cycles # 3.043 GHz + 481,327 stalled-cycles-frontend # 60.50% frontend cycles idle + 341,767 stalled-cycles-backend # 42.95% backend cycles idle + 574,374 instructions # 0.72 insns per cycle + # 0.84 stalled cycles per insn + 103,651 branches # 396.451 M/sec + branch-misses + + 0.001741528 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply 10': + + 0.264455 task-clock # 0.300 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.480 M/sec + 790,618 cycles # 2.990 GHz + 478,470 stalled-cycles-frontend # 60.52% frontend cycles idle + 330,579 stalled-cycles-backend # 41.81% backend cycles idle + 568,364 instructions # 0.72 insns per cycle + # 0.84 stalled cycles per insn + 102,901 branches # 389.106 M/sec + branch-misses + + 0.000881953 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply 10': + + 0.259611 task-clock # 0.320 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.485 M/sec + 789,701 cycles # 3.042 GHz + 474,512 stalled-cycles-frontend # 60.09% frontend cycles idle + 332,633 stalled-cycles-backend # 42.12% backend cycles idle + 576,821 instructions # 0.73 insns per cycle + # 0.82 stalled cycles per insn + 104,740 branches # 403.450 M/sec + branch-misses + + 0.000811846 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply 10': + + 0.261265 task-clock # 0.147 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.482 M/sec + 793,438 cycles # 3.037 GHz + 479,473 stalled-cycles-frontend # 60.43% frontend cycles idle + 320,606 stalled-cycles-backend # 40.41% backend cycles idle + 572,808 instructions # 0.72 insns per cycle + # 0.84 stalled cycles per insn + 104,034 branches # 398.193 M/sec + branch-misses + + 0.001780051 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply 10': + + 0.261124 task-clock # 0.302 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.486 M/sec + 794,602 cycles # 3.043 GHz + 476,906 stalled-cycles-frontend # 60.02% frontend cycles idle + 329,122 stalled-cycles-backend # 41.42% backend cycles idle + 583,923 instructions # 0.73 insns per cycle + # 0.82 stalled cycles per insn + 105,965 branches # 405.803 M/sec + branch-misses + + 0.000863590 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_100 b/finalExperimentRuns/gcc_output_100 new file mode 100644 index 0000000..cecefd4 --- /dev/null +++ b/finalExperimentRuns/gcc_output_100 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply 100': + + 8.054504 task-clock # 0.903 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,610,696 cycles # 3.056 GHz [50.57%] + 7,909,722 stalled-cycles-frontend # 32.14% frontend cycles idle + 689,986 stalled-cycles-backend # 2.80% backend cycles idle + 52,144,755 instructions # 2.12 insns per cycle + # 0.15 stalled cycles per insn + 1,478,711 branches # 183.588 M/sec + 11,727 branch-misses # 0.79% of all branches [76.43%] + + 0.008919947 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply 100': + + 8.117808 task-clock # 0.932 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,718,750 cycles # 3.045 GHz [50.92%] + 8,195,388 stalled-cycles-frontend # 33.15% frontend cycles idle + 897,551 stalled-cycles-backend # 3.63% backend cycles idle + 52,145,300 instructions # 2.11 insns per cycle + # 0.16 stalled cycles per insn + 1,478,850 branches # 182.174 M/sec + 11,724 branch-misses # 0.79% of all branches [74.58%] + + 0.008711930 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply 100': + + 8.111482 task-clock # 0.927 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.019 M/sec + 24,714,858 cycles # 3.047 GHz [50.88%] + 8,209,540 stalled-cycles-frontend # 33.22% frontend cycles idle + 698,504 stalled-cycles-backend # 2.83% backend cycles idle + 52,145,272 instructions # 2.11 insns per cycle + # 0.16 stalled cycles per insn + 1,478,847 branches # 182.315 M/sec + 11,726 branch-misses # 0.79% of all branches [74.46%] + + 0.008749660 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply 100': + + 8.148961 task-clock # 0.917 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.019 M/sec + 24,818,324 cycles # 3.046 GHz [51.10%] + 6,387,176 stalled-cycles-frontend # 25.74% frontend cycles idle + 574,390 stalled-cycles-backend # 2.31% backend cycles idle + 52,168,459 instructions # 2.10 insns per cycle + # 0.12 stalled cycles per insn + 1,483,171 branches # 182.007 M/sec + 11,840 branch-misses # 0.80% of all branches [90.60%] + + 0.008883245 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply 100': + + 8.087013 task-clock # 0.908 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,632,136 cycles # 3.046 GHz [50.73%] + 6,193,928 stalled-cycles-frontend # 25.15% frontend cycles idle + 772,134 stalled-cycles-backend # 3.13% backend cycles idle + 52,143,390 instructions # 2.12 insns per cycle + # 0.12 stalled cycles per insn + 1,478,507 branches # 182.825 M/sec + 11,557 branch-misses # 0.78% of all branches [91.88%] + + 0.008906666 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply 100': + + 8.084764 task-clock # 0.930 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,635,382 cycles # 3.047 GHz [50.73%] + 10,318,247 stalled-cycles-frontend # 41.88% frontend cycles idle + 985,300 stalled-cycles-backend # 4.00% backend cycles idle + 52,144,339 instructions # 2.12 insns per cycle + # 0.20 stalled cycles per insn + 1,478,689 branches # 182.898 M/sec + 12,290 branch-misses # 0.83% of all branches [54.83%] + + 0.008697934 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply 100': + + 8.080921 task-clock # 0.933 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,599,853 cycles # 3.044 GHz [50.70%] + 6,254,276 stalled-cycles-frontend # 25.42% frontend cycles idle + 871,510 stalled-cycles-backend # 3.54% backend cycles idle + 52,146,066 instructions # 2.12 insns per cycle + # 0.12 stalled cycles per insn + 1,479,020 branches # 183.026 M/sec + 11,596 branch-misses # 0.78% of all branches [91.42%] + + 0.008656610 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply 100': + + 8.104023 task-clock # 0.910 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,721,453 cycles # 3.051 GHz [50.81%] + 7,609,755 stalled-cycles-frontend # 30.78% frontend cycles idle + 797,005 stalled-cycles-backend # 3.22% backend cycles idle + 52,152,265 instructions # 2.11 insns per cycle + # 0.15 stalled cycles per insn + 1,480,154 branches # 182.644 M/sec + 11,875 branch-misses # 0.80% of all branches [79.49%] + + 0.008901112 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply 100': + + 8.105288 task-clock # 0.924 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,710,843 cycles # 3.049 GHz [50.85%] + 8,609,115 stalled-cycles-frontend # 34.84% frontend cycles idle + 511,967 stalled-cycles-backend # 2.07% backend cycles idle + 52,146,432 instructions # 2.11 insns per cycle + # 0.17 stalled cycles per insn + 1,479,068 branches # 182.482 M/sec + 11,732 branch-misses # 0.79% of all branches [70.47%] + + 0.008771563 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply 100': + + 8.104444 task-clock # 0.935 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.020 M/sec + 24,657,762 cycles # 3.042 GHz [50.84%] + 8,398,717 stalled-cycles-frontend # 34.06% frontend cycles idle + 1,004,334 stalled-cycles-backend # 4.07% backend cycles idle + 52,144,374 instructions # 2.11 insns per cycle + # 0.16 stalled cycles per insn + 1,478,685 branches # 182.454 M/sec + 11,712 branch-misses # 0.79% of all branches [72.47%] + + 0.008664229 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_1000 b/finalExperimentRuns/gcc_output_1000 new file mode 100644 index 0000000..143e8fe --- /dev/null +++ b/finalExperimentRuns/gcc_output_1000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply 1000': + + 9926.708359 task-clock # 0.997 CPUs utilized + 13 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,395,895,247 cycles # 3.062 GHz [83.32%] + 17,442,111,645 stalled-cycles-frontend # 57.38% frontend cycles idle [83.32%] + 4,164,384,143 stalled-cycles-backend # 13.70% backend cycles idle [66.68%] + 50,155,717,267 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.36%] + 1,037,542,959 branches # 104.520 M/sec [83.36%] + 1,062,313 branch-misses # 0.10% of all branches [83.32%] + + 9.952598326 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply 1000': + + 9989.945693 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,589,481,178 cycles # 3.062 GHz [83.31%] + 17,621,887,593 stalled-cycles-frontend # 57.61% frontend cycles idle [83.34%] + 4,027,488,464 stalled-cycles-backend # 13.17% backend cycles idle [66.69%] + 50,159,899,384 instructions # 1.64 insns per cycle + # 0.35 stalled cycles per insn [83.35%] + 1,037,610,140 branches # 103.865 M/sec [83.34%] + 1,072,230 branch-misses # 0.10% of all branches [83.34%] + + 10.015847661 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply 1000': + + 9872.782392 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,230,798,240 cycles # 3.062 GHz [83.31%] + 17,272,580,035 stalled-cycles-frontend # 57.14% frontend cycles idle [83.34%] + 3,944,001,447 stalled-cycles-backend # 13.05% backend cycles idle [66.70%] + 50,157,447,316 instructions # 1.66 insns per cycle + # 0.34 stalled cycles per insn [83.35%] + 1,037,562,856 branches # 105.093 M/sec [83.35%] + 1,070,768 branch-misses # 0.10% of all branches [83.33%] + + 9.898330956 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply 1000': + + 9960.652528 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 2 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 30,442,315,955 cycles # 3.056 GHz [83.34%] + 17,495,255,191 stalled-cycles-frontend # 57.47% frontend cycles idle [83.34%] + 4,435,073,572 stalled-cycles-backend # 14.57% backend cycles idle [66.67%] + 50,158,527,538 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.34%] + 1,037,684,531 branches # 104.178 M/sec [83.34%] + 1,090,517 branch-misses # 0.11% of all branches [83.35%] + + 9.986475787 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply 1000': + + 9983.888308 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,571,145,817 cycles # 3.062 GHz [83.33%] + 17,613,563,281 stalled-cycles-frontend # 57.61% frontend cycles idle [83.34%] + 4,138,212,697 stalled-cycles-backend # 13.54% backend cycles idle [66.67%] + 50,155,387,715 instructions # 1.64 insns per cycle + # 0.35 stalled cycles per insn [83.33%] + 1,037,567,984 branches # 103.924 M/sec [83.33%] + 1,068,596 branch-misses # 0.10% of all branches [83.35%] + + 10.010151775 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply 1000': + + 9853.437478 task-clock # 0.997 CPUs utilized + 13 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 30,170,703,152 cycles # 3.062 GHz [83.32%] + 17,222,155,727 stalled-cycles-frontend # 57.08% frontend cycles idle [83.32%] + 4,055,670,788 stalled-cycles-backend # 13.44% backend cycles idle [66.67%] + 50,155,779,844 instructions # 1.66 insns per cycle + # 0.34 stalled cycles per insn [83.36%] + 1,037,560,137 branches # 105.299 M/sec [83.36%] + 1,074,365 branch-misses # 0.10% of all branches [83.35%] + + 9.879197104 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply 1000': + + 9885.285809 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 30,268,249,154 cycles # 3.062 GHz [83.34%] + 17,314,471,270 stalled-cycles-frontend # 57.20% frontend cycles idle [83.33%] + 4,082,457,570 stalled-cycles-backend # 13.49% backend cycles idle [66.66%] + 50,154,666,290 instructions # 1.66 insns per cycle + # 0.35 stalled cycles per insn [83.33%] + 1,037,650,375 branches # 104.969 M/sec [83.33%] + 1,085,196 branch-misses # 0.10% of all branches [83.37%] + + 9.910950714 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply 1000': + + 9914.407092 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,356,897,007 cycles # 3.062 GHz [83.34%] + 17,403,547,284 stalled-cycles-frontend # 57.33% frontend cycles idle [83.34%] + 4,191,887,997 stalled-cycles-backend # 13.81% backend cycles idle [66.68%] + 50,156,122,104 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.34%] + 1,037,539,929 branches # 104.650 M/sec [83.34%] + 1,065,113 branch-misses # 0.10% of all branches [83.31%] + + 9.940317228 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply 1000': + + 9937.674711 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,427,817,353 cycles # 3.062 GHz [83.34%] + 17,471,897,841 stalled-cycles-frontend # 57.42% frontend cycles idle [83.34%] + 4,027,974,555 stalled-cycles-backend # 13.24% backend cycles idle [66.68%] + 50,158,261,034 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.34%] + 1,037,803,159 branches # 104.431 M/sec [83.34%] + 1,088,832 branch-misses # 0.10% of all branches [83.32%] + + 9.963562648 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply 1000': + + 9878.683493 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,249,098,352 cycles # 3.062 GHz [83.32%] + 17,298,067,630 stalled-cycles-frontend # 57.19% frontend cycles idle [83.32%] + 3,974,575,999 stalled-cycles-backend # 13.14% backend cycles idle [66.67%] + 50,150,796,611 instructions # 1.66 insns per cycle + # 0.34 stalled cycles per insn [83.36%] + 1,037,562,373 branches # 105.030 M/sec [83.36%] + 1,067,234 branch-misses # 0.10% of all branches [83.34%] + + 9.904228039 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_3000 b/finalExperimentRuns/gcc_output_3000 new file mode 100644 index 0000000..1da4004 --- /dev/null +++ b/finalExperimentRuns/gcc_output_3000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply 3000': + + 475300.257913 task-clock # 0.997 CPUs utilized + 594 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,455,206,821,524 cycles # 3.062 GHz [83.33%] + 1,096,016,878,004 stalled-cycles-frontend # 75.32% frontend cycles idle [83.33%] + 590,754,550,647 stalled-cycles-backend # 40.60% backend cycles idle [66.67%] + 1,351,742,482,872 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,393,869,687 branches # 57.635 M/sec [83.33%] + 14,090,789 branch-misses # 0.05% of all branches [83.33%] + + 476.494079626 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply 3000': + + 476018.020595 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,457,356,444,320 cycles # 3.062 GHz [83.33%] + 1,097,887,781,677 stalled-cycles-frontend # 75.33% frontend cycles idle [83.33%] + 593,165,290,075 stalled-cycles-backend # 40.70% backend cycles idle [66.67%] + 1,351,747,334,373 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,936,581 branches # 57.554 M/sec [83.33%] + 14,020,326 branch-misses # 0.05% of all branches [83.33%] + + 477.213941232 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply 3000': + + 474569.437761 task-clock # 0.997 CPUs utilized + 595 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,452,880,580,364 cycles # 3.061 GHz [83.33%] + 1,093,658,008,544 stalled-cycles-frontend # 75.28% frontend cycles idle [83.33%] + 598,953,855,484 stalled-cycles-backend # 41.23% backend cycles idle [66.67%] + 1,351,708,174,318 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,397,137,447 branches # 57.731 M/sec [83.33%] + 13,353,583 branch-misses # 0.05% of all branches [83.33%] + + 475.763890065 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply 3000': + + 475646.500239 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,456,163,649,462 cycles # 3.061 GHz [83.33%] + 1,096,827,868,403 stalled-cycles-frontend # 75.32% frontend cycles idle [83.33%] + 604,183,182,642 stalled-cycles-backend # 41.49% backend cycles idle [66.67%] + 1,351,761,637,391 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,786,793 branches # 57.599 M/sec [83.33%] + 13,320,251 branch-misses # 0.05% of all branches [83.33%] + + 476.841596270 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply 3000': + + 476211.404942 task-clock # 0.997 CPUs utilized + 597 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,457,959,876,543 cycles # 3.062 GHz [83.33%] + 1,098,411,393,007 stalled-cycles-frontend # 75.34% frontend cycles idle [83.33%] + 607,061,107,646 stalled-cycles-backend # 41.64% backend cycles idle [66.67%] + 1,351,758,909,796 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,397,007,312 branches # 57.531 M/sec [83.33%] + 13,916,373 branch-misses # 0.05% of all branches [83.33%] + + 477.407551808 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply 3000': + + 475948.824921 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,457,196,407,185 cycles # 3.062 GHz [83.33%] + 1,097,491,418,801 stalled-cycles-frontend # 75.32% frontend cycles idle [83.33%] + 619,424,280,588 stalled-cycles-backend # 42.51% backend cycles idle [66.67%] + 1,351,752,463,467 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,395,156,718 branches # 57.559 M/sec [83.33%] + 13,646,100 branch-misses # 0.05% of all branches [83.33%] + + 477.144006180 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply 3000': + + 476555.496544 task-clock # 0.997 CPUs utilized + 597 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,459,055,808,779 cycles # 3.062 GHz [83.33%] + 1,099,359,336,770 stalled-cycles-frontend # 75.35% frontend cycles idle [83.33%] + 605,859,886,112 stalled-cycles-backend # 41.52% backend cycles idle [66.67%] + 1,351,748,188,845 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,976,815 branches # 57.490 M/sec [83.33%] + 13,608,298 branch-misses # 0.05% of all branches [83.33%] + + 477.751263190 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply 3000': + + 476550.619236 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,458,990,788,545 cycles # 3.062 GHz [83.33%] + 1,099,354,204,996 stalled-cycles-frontend # 75.35% frontend cycles idle [83.33%] + 605,327,241,764 stalled-cycles-backend # 41.49% backend cycles idle [66.67%] + 1,351,757,320,368 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,713,068 branches # 57.490 M/sec [83.33%] + 13,874,648 branch-misses # 0.05% of all branches [83.33%] + + 477.765602818 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply 3000': + + 477530.594343 task-clock # 0.997 CPUs utilized + 597 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,461,959,826,317 cycles # 3.061 GHz [83.33%] + 1,102,192,822,811 stalled-cycles-frontend # 75.39% frontend cycles idle [83.33%] + 602,765,299,108 stalled-cycles-backend # 41.23% backend cycles idle [66.67%] + 1,351,757,093,932 instructions # 0.92 insns per cycle + # 0.82 stalled cycles per insn [83.33%] + 27,398,383,869 branches # 57.375 M/sec [83.33%] + 13,438,518 branch-misses # 0.05% of all branches [83.33%] + + 478.728380489 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply 3000': + + 477246.288455 task-clock # 0.997 CPUs utilized + 598 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,461,142,413,968 cycles # 3.062 GHz [83.33%] + 1,101,548,208,648 stalled-cycles-frontend # 75.39% frontend cycles idle [83.33%] + 610,983,270,807 stalled-cycles-backend # 41.82% backend cycles idle [66.67%] + 1,351,767,194,355 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,397,222,382 branches # 57.407 M/sec [83.33%] + 14,071,841 branch-misses # 0.05% of all branches [83.33%] + + 478.444205849 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_optimized_10 b/finalExperimentRuns/gcc_output_optimized_10 new file mode 100644 index 0000000..ac95581 --- /dev/null +++ b/finalExperimentRuns/gcc_output_optimized_10 @@ -0,0 +1,180 @@ +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.248266 task-clock # 0.177 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.512 M/sec + cycles + 456,214 stalled-cycles-frontend # 0.00% frontend cycles idle + 322,343 stalled-cycles-backend # 0.00% backend cycles idle + 525,274 instructions # 0.00 insns per cycle + # 0.87 stalled cycles per insn + 102,761 branches # 413.915 M/sec + 6,305 branch-misses # 6.14% of all branches + + 0.001399414 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.250526 task-clock # 0.158 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.503 M/sec + 761,374 cycles # 3.039 GHz + 460,669 stalled-cycles-frontend # 60.50% frontend cycles idle + 278,379 stalled-cycles-backend # 36.56% backend cycles idle + 531,203 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 104,155 branches # 415.745 M/sec + branch-misses + + 0.001584906 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.256776 task-clock # 0.261 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.495 M/sec + 769,126 cycles # 2.995 GHz + 464,343 stalled-cycles-frontend # 60.37% frontend cycles idle + 305,155 stalled-cycles-backend # 39.68% backend cycles idle + 537,950 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,335 branches # 410.221 M/sec + branch-misses + + 0.000984387 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.252672 task-clock # 0.303 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.503 M/sec + 758,151 cycles # 3.001 GHz + 456,805 stalled-cycles-frontend # 60.25% frontend cycles idle + 312,114 stalled-cycles-backend # 41.17% backend cycles idle + 535,170 instructions # 0.71 insns per cycle + # 0.85 stalled cycles per insn + 104,829 branches # 414.882 M/sec + branch-misses + + 0.000834122 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.256107 task-clock # 0.319 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.496 M/sec + 777,635 cycles # 3.036 GHz + 471,077 stalled-cycles-frontend # 60.58% frontend cycles idle + 311,009 stalled-cycles-backend # 39.99% backend cycles idle + 537,745 instructions # 0.69 insns per cycle + # 0.88 stalled cycles per insn + 105,304 branches # 411.172 M/sec + branch-misses + + 0.000803563 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.253460 task-clock # 0.266 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.501 M/sec + 770,539 cycles # 3.040 GHz + 464,693 stalled-cycles-frontend # 60.31% frontend cycles idle + 320,075 stalled-cycles-backend # 41.54% backend cycles idle + 535,462 instructions # 0.69 insns per cycle + # 0.87 stalled cycles per insn + 104,877 branches # 413.781 M/sec + branch-misses + + 0.000953846 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.257419 task-clock # 0.307 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.493 M/sec + 771,110 cycles # 2.996 GHz + 466,926 stalled-cycles-frontend # 60.55% frontend cycles idle + 338,319 stalled-cycles-backend # 43.87% backend cycles idle + 536,400 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 105,084 branches # 408.222 M/sec + branch-misses + + 0.000837879 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.251920 task-clock # 0.292 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.504 M/sec + 765,830 cycles # 3.040 GHz + 460,303 stalled-cycles-frontend # 60.11% frontend cycles idle + 311,031 stalled-cycles-backend # 40.61% backend cycles idle + 536,633 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,101 branches # 417.200 M/sec + branch-misses + + 0.000862143 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.251167 task-clock # 0.329 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.506 M/sec + 765,608 cycles # 3.048 GHz + 463,671 stalled-cycles-frontend # 60.56% frontend cycles idle + 321,814 stalled-cycles-backend # 42.03% backend cycles idle + 534,394 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 104,724 branches # 416.950 M/sec + branch-misses + + 0.000763382 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.257385 task-clock # 0.271 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.493 M/sec + 781,086 cycles # 3.035 GHz + 474,101 stalled-cycles-frontend # 60.70% frontend cycles idle + 323,675 stalled-cycles-backend # 41.44% backend cycles idle + 537,279 instructions # 0.69 insns per cycle + # 0.88 stalled cycles per insn + 105,124 branches # 408.431 M/sec + branch-misses + + 0.000949143 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_optimized_100 b/finalExperimentRuns/gcc_output_optimized_100 new file mode 100644 index 0000000..02b8d64 --- /dev/null +++ b/finalExperimentRuns/gcc_output_optimized_100 @@ -0,0 +1,180 @@ +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.849608 task-clock # 0.769 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.086 M/sec + 5,664,275 cycles # 3.062 GHz + 732,765 stalled-cycles-frontend # 12.94% frontend cycles idle + 415,452 stalled-cycles-backend # 7.33% backend cycles idle + 10,010,514 instructions # 1.77 insns per cycle + # 0.07 stalled cycles per insn + 1,456,645 branches # 787.543 M/sec + branch-misses + + 0.002405004 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.839908 task-clock # 0.666 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.086 M/sec + 5,636,324 cycles # 3.063 GHz + 708,018 stalled-cycles-frontend # 12.56% frontend cycles idle + 1,488,676 stalled-cycles-backend # 26.41% backend cycles idle + 9,999,645 instructions # 1.77 insns per cycle + # 0.15 stalled cycles per insn + 1,455,193 branches # 790.905 M/sec + branch-misses + + 0.002762590 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.894950 task-clock # 0.768 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.084 M/sec + 2,365,633 cycles # 1.248 GHz + 815,107 stalled-cycles-frontend # 34.46% frontend cycles idle + 1,395,279 stalled-cycles-backend # 58.98% backend cycles idle + 10,005,259 instructions # 4.23 insns per cycle + # 0.14 stalled cycles per insn + 1,456,126 branches # 768.424 M/sec + 14,501 branch-misses # 1.00% of all branches [58.57%] + + 0.002468015 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.875375 task-clock # 0.766 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.085 M/sec + 4,882,424 cycles # 2.603 GHz + 752,488 stalled-cycles-frontend # 15.41% frontend cycles idle + 1,341,572 stalled-cycles-backend # 27.48% backend cycles idle + 10,020,521 instructions # 2.05 insns per cycle + # 0.13 stalled cycles per insn + 1,458,408 branches # 777.662 M/sec + 15,896 branch-misses # 1.09% of all branches [14.62%] + + 0.002449323 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.847871 task-clock # 0.723 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.086 M/sec + 5,660,288 cycles # 3.063 GHz + 727,224 stalled-cycles-frontend # 12.85% frontend cycles idle + 1,493,416 stalled-cycles-backend # 26.38% backend cycles idle + 10,014,309 instructions # 1.77 insns per cycle + # 0.15 stalled cycles per insn + 1,457,300 branches # 788.637 M/sec + branch-misses + + 0.002555472 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.879068 task-clock # 0.768 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.084 M/sec + 3,421,298 cycles # 1.821 GHz + 761,542 stalled-cycles-frontend # 22.26% frontend cycles idle + 1,433,667 stalled-cycles-backend # 41.90% backend cycles idle + 10,018,860 instructions # 2.93 insns per cycle + # 0.14 stalled cycles per insn + 1,458,144 branches # 775.993 M/sec + 14,789 branch-misses # 1.01% of all branches [40.00%] + + 0.002447307 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.883634 task-clock # 0.739 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.084 M/sec + 321,967 cycles # 0.171 GHz + 796,732 stalled-cycles-frontend # 247.46% frontend cycles idle + 443,941 stalled-cycles-backend # 137.88% backend cycles idle + 10,018,569 instructions # 31.12 insns per cycle + # 0.08 stalled cycles per insn + 1,458,017 branches # 774.045 M/sec + 16,126 branch-misses # 1.11% of all branches [93.59%] + + 0.002549813 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.869718 task-clock # 0.709 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.085 M/sec + 1,144,239 cycles # 0.612 GHz + 746,887 stalled-cycles-frontend # 65.27% frontend cycles idle + 1,506,395 stalled-cycles-backend # 131.65% backend cycles idle + 10,012,630 instructions # 8.75 insns per cycle + # 0.15 stalled cycles per insn + 1,457,200 branches # 779.369 M/sec + 13,662 branch-misses # 0.94% of all branches [79.35%] + + 0.002638744 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.919213 task-clock # 0.588 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.083 M/sec + 1,076,319 cycles # 0.561 GHz + 895,237 stalled-cycles-frontend # 83.18% frontend cycles idle + 1,645,401 stalled-cycles-backend # 152.87% backend cycles idle + 10,022,421 instructions # 9.31 insns per cycle + # 0.16 stalled cycles per insn + 1,458,721 branches # 760.062 M/sec + 13,434 branch-misses # 0.92% of all branches [80.97%] + + 0.003264739 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.883476 task-clock # 0.690 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.084 M/sec + 4,552,284 cycles # 2.417 GHz + 783,434 stalled-cycles-frontend # 17.21% frontend cycles idle + 1,317,945 stalled-cycles-backend # 28.95% backend cycles idle + 10,008,721 instructions # 2.20 insns per cycle + # 0.13 stalled cycles per insn + 1,456,761 branches # 773.443 M/sec + 14,949 branch-misses # 1.03% of all branches [20.70%] + + 0.002728343 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_optimized_1000 b/finalExperimentRuns/gcc_output_optimized_1000 new file mode 100644 index 0000000..4da73bd --- /dev/null +++ b/finalExperimentRuns/gcc_output_optimized_1000 @@ -0,0 +1,180 @@ +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8524.007146 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,101,094,636 cycles # 3.062 GHz [83.34%] + 21,174,291,573 stalled-cycles-frontend # 81.12% frontend cycles idle [83.34%] + 8,086,059,447 stalled-cycles-backend # 30.98% backend cycles idle [66.67%] + 8,147,023,966 instructions # 0.31 insns per cycle + # 2.60 stalled cycles per insn [83.34%] + 1,034,584,832 branches # 121.373 M/sec [83.34%] + 1,056,203 branch-misses # 0.10% of all branches [83.35%] + + 8.546169904 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8480.788072 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 25,968,524,213 cycles # 3.062 GHz [83.30%] + 21,026,920,732 stalled-cycles-frontend # 80.97% frontend cycles idle [83.35%] + 8,372,590,975 stalled-cycles-backend # 32.24% backend cycles idle [66.69%] + 8,146,964,730 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.35%] + 1,034,591,652 branches # 121.992 M/sec [83.34%] + 1,061,216 branch-misses # 0.10% of all branches [83.32%] + + 8.502870304 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8499.518119 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,025,909,327 cycles # 3.062 GHz [83.34%] + 21,094,624,277 stalled-cycles-frontend # 81.05% frontend cycles idle [83.33%] + 7,597,054,786 stalled-cycles-backend # 29.19% backend cycles idle [66.67%] + 8,146,986,115 instructions # 0.31 insns per cycle + # 2.59 stalled cycles per insn [83.34%] + 1,034,616,479 branches # 121.726 M/sec [83.33%] + 1,055,439 branch-misses # 0.10% of all branches [83.34%] + + 8.521753033 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8567.572363 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,233,696,728 cycles # 3.062 GHz [83.33%] + 21,318,293,997 stalled-cycles-frontend # 81.26% frontend cycles idle [83.33%] + 8,013,091,628 stalled-cycles-backend # 30.55% backend cycles idle [66.66%] + 8,147,236,780 instructions # 0.31 insns per cycle + # 2.62 stalled cycles per insn [83.33%] + 1,033,816,017 branches # 120.666 M/sec [83.37%] + 1,057,271 branch-misses # 0.10% of all branches [83.35%] + + 8.589968438 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8498.048583 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,020,348,975 cycles # 3.062 GHz [83.33%] + 21,088,844,072 stalled-cycles-frontend # 81.05% frontend cycles idle [83.33%] + 7,943,621,197 stalled-cycles-backend # 30.53% backend cycles idle [66.66%] + 8,146,770,884 instructions # 0.31 insns per cycle + # 2.59 stalled cycles per insn [83.33%] + 1,034,364,119 branches # 121.718 M/sec [83.33%] + 1,057,797 branch-misses # 0.10% of all branches [83.35%] + + 8.520236784 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8523.980835 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,099,105,396 cycles # 3.062 GHz [83.34%] + 21,174,457,333 stalled-cycles-frontend # 81.13% frontend cycles idle [83.34%] + 7,971,768,233 stalled-cycles-backend # 30.54% backend cycles idle [66.67%] + 8,147,071,539 instructions # 0.31 insns per cycle + # 2.60 stalled cycles per insn [83.34%] + 1,034,599,564 branches # 121.375 M/sec [83.34%] + 1,067,177 branch-misses # 0.10% of all branches [83.33%] + + 8.546194472 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8539.281197 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,147,787,605 cycles # 3.062 GHz [83.32%] + 21,232,515,363 stalled-cycles-frontend # 81.20% frontend cycles idle [83.32%] + 7,997,377,449 stalled-cycles-backend # 30.59% backend cycles idle [66.67%] + 8,142,040,940 instructions # 0.31 insns per cycle + # 2.61 stalled cycles per insn [83.36%] + 1,034,582,094 branches # 121.156 M/sec [83.37%] + 1,067,037 branch-misses # 0.10% of all branches [83.35%] + + 8.561577755 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8483.737252 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 25,976,418,040 cycles # 3.062 GHz [83.30%] + 21,031,895,833 stalled-cycles-frontend # 80.97% frontend cycles idle [83.35%] + 7,905,370,309 stalled-cycles-backend # 30.43% backend cycles idle [66.70%] + 8,147,671,922 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.35%] + 1,034,722,307 branches # 121.965 M/sec [83.35%] + 1,058,817 branch-misses # 0.10% of all branches [83.31%] + + 8.505817558 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8487.683539 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 25,989,385,640 cycles # 3.062 GHz [83.31%] + 21,054,478,049 stalled-cycles-frontend # 81.01% frontend cycles idle [83.31%] + 7,474,887,009 stalled-cycles-backend # 28.76% backend cycles idle [66.71%] + 8,146,958,745 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.36%] + 1,034,611,204 branches # 121.896 M/sec [83.36%] + 1,063,850 branch-misses # 0.10% of all branches [83.34%] + + 8.509833793 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8485.279076 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 25,980,743,845 cycles # 3.062 GHz [83.31%] + 21,043,200,703 stalled-cycles-frontend # 81.00% frontend cycles idle [83.31%] + 7,855,243,129 stalled-cycles-backend # 30.23% backend cycles idle [66.70%] + 8,147,134,514 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.35%] + 1,034,632,398 branches # 121.933 M/sec [83.35%] + 1,068,154 branch-misses # 0.10% of all branches [83.34%] + + 8.507968113 seconds time elapsed + diff --git a/finalExperimentRuns/gcc_output_optimized_3000 b/finalExperimentRuns/gcc_output_optimized_3000 new file mode 100644 index 0000000..bed3fc6 --- /dev/null +++ b/finalExperimentRuns/gcc_output_optimized_3000 @@ -0,0 +1,180 @@ +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 307990.722698 task-clock # 0.997 CPUs utilized + 385 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 942,885,267,154 cycles # 3.061 GHz [83.33%] + 813,151,718,128 stalled-cycles-frontend # 86.24% frontend cycles idle [83.33%] + 479,110,721,206 stalled-cycles-backend # 50.81% backend cycles idle [66.67%] + 217,537,751,166 instructions # 0.23 insns per cycle + # 3.74 stalled cycles per insn [83.33%] + 27,350,281,815 branches # 88.802 M/sec [83.33%] + 13,416,834 branch-misses # 0.05% of all branches [83.33%] + + 308.764947876 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 309095.950256 task-clock # 0.997 CPUs utilized + 388 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 946,223,423,681 cycles # 3.061 GHz [83.33%] + 816,700,149,527 stalled-cycles-frontend # 86.31% frontend cycles idle [83.33%] + 478,184,484,905 stalled-cycles-backend # 50.54% backend cycles idle [66.67%] + 217,549,241,887 instructions # 0.23 insns per cycle + # 3.75 stalled cycles per insn [83.33%] + 27,352,134,829 branches # 88.491 M/sec [83.33%] + 12,496,243 branch-misses # 0.05% of all branches [83.33%] + + 309.874694599 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 308456.151399 task-clock # 0.997 CPUs utilized + 387 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 944,147,394,731 cycles # 3.061 GHz [83.33%] + 815,004,329,682 stalled-cycles-frontend # 86.32% frontend cycles idle [83.33%] + 492,153,218,620 stalled-cycles-backend # 52.13% backend cycles idle [66.67%] + 217,542,001,519 instructions # 0.23 insns per cycle + # 3.75 stalled cycles per insn [83.33%] + 27,351,929,959 branches # 88.674 M/sec [83.33%] + 12,737,640 branch-misses # 0.05% of all branches [83.33%] + + 309.231466001 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 308648.298096 task-clock # 0.997 CPUs utilized + 387 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 944,897,614,537 cycles # 3.061 GHz [83.33%] + 815,188,834,965 stalled-cycles-frontend # 86.27% frontend cycles idle [83.33%] + 485,379,940,869 stalled-cycles-backend # 51.37% backend cycles idle [66.67%] + 217,534,979,891 instructions # 0.23 insns per cycle + # 3.75 stalled cycles per insn [83.33%] + 27,351,527,278 branches # 88.617 M/sec [83.33%] + 13,388,415 branch-misses # 0.05% of all branches [83.33%] + + 309.423862273 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 309909.768801 task-clock # 0.997 CPUs utilized + 388 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 948,763,312,383 cycles # 3.061 GHz [83.33%] + 819,052,696,950 stalled-cycles-frontend # 86.33% frontend cycles idle [83.33%] + 482,990,679,317 stalled-cycles-backend # 50.91% backend cycles idle [66.67%] + 217,539,864,459 instructions # 0.23 insns per cycle + # 3.77 stalled cycles per insn [83.33%] + 27,352,050,126 branches # 88.258 M/sec [83.33%] + 13,177,647 branch-misses # 0.05% of all branches [83.33%] + + 310.688240232 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 309327.733528 task-clock # 0.997 CPUs utilized + 388 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 947,010,787,729 cycles # 3.062 GHz [83.33%] + 817,412,412,659 stalled-cycles-frontend # 86.32% frontend cycles idle [83.33%] + 488,015,320,106 stalled-cycles-backend # 51.53% backend cycles idle [66.67%] + 217,532,974,150 instructions # 0.23 insns per cycle + # 3.76 stalled cycles per insn [83.33%] + 27,349,693,081 branches # 88.417 M/sec [83.33%] + 13,212,984 branch-misses # 0.05% of all branches [83.33%] + + 310.105193532 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 310282.492791 task-clock # 0.997 CPUs utilized + 389 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 949,907,449,960 cycles # 3.061 GHz [83.33%] + 820,459,518,182 stalled-cycles-frontend # 86.37% frontend cycles idle [83.33%] + 471,778,705,376 stalled-cycles-backend # 49.67% backend cycles idle [66.67%] + 217,524,558,295 instructions # 0.23 insns per cycle + # 3.77 stalled cycles per insn [83.33%] + 27,351,609,383 branches # 88.151 M/sec [83.33%] + 13,344,766 branch-misses # 0.05% of all branches [83.33%] + + 311.104012259 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 310349.330476 task-clock # 0.997 CPUs utilized + 389 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 950,012,415,697 cycles # 3.061 GHz [83.33%] + 820,923,300,577 stalled-cycles-frontend # 86.41% frontend cycles idle [83.33%] + 468,957,637,855 stalled-cycles-backend # 49.36% backend cycles idle [66.67%] + 217,545,884,095 instructions # 0.23 insns per cycle + # 3.77 stalled cycles per insn [83.33%] + 27,353,566,509 branches # 88.138 M/sec [83.33%] + 12,597,801 branch-misses # 0.05% of all branches [83.33%] + + 311.146848527 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 310848.446781 task-clock # 0.997 CPUs utilized + 389 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 951,637,386,126 cycles # 3.061 GHz [83.33%] + 822,546,224,447 stalled-cycles-frontend # 86.43% frontend cycles idle [83.33%] + 495,897,697,137 stalled-cycles-backend # 52.11% backend cycles idle [66.67%] + 217,546,544,859 instructions # 0.23 insns per cycle + # 3.78 stalled cycles per insn [83.33%] + 27,353,133,771 branches # 87.995 M/sec [83.33%] + 13,283,333 branch-misses # 0.05% of all branches [83.33%] + + 311.629074931 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 312178.283971 task-clock # 0.997 CPUs utilized + 391 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 955,712,667,148 cycles # 3.061 GHz [83.33%] + 826,044,644,959 stalled-cycles-frontend # 86.43% frontend cycles idle [83.33%] + 499,061,986,949 stalled-cycles-backend # 52.22% backend cycles idle [66.67%] + 217,558,514,147 instructions # 0.23 insns per cycle + # 3.80 stalled cycles per insn [83.33%] + 27,352,241,903 branches # 87.617 M/sec [83.33%] + 13,193,839 branch-misses # 0.05% of all branches [83.33%] + + 312.962113204 seconds time elapsed + diff --git a/finalExperimentRuns/java_output_10 b/finalExperimentRuns/java_output_10 new file mode 100644 index 0000000..e2ed670 --- /dev/null +++ b/finalExperimentRuns/java_output_10 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for 'java MatrixMultiplier 10': + + 72.815106 task-clock # 0.984 CPUs utilized + 139 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.001 M/sec + 4,253 page-faults # 0.058 M/sec + 212,160,742 cycles # 2.914 GHz [83.64%] + 114,430,847 stalled-cycles-frontend # 53.94% frontend cycles idle [83.36%] + 80,787,912 stalled-cycles-backend # 38.08% backend cycles idle [62.53%] + 182,410,561 instructions # 0.86 insns per cycle + # 0.63 stalled cycles per insn [79.34%] + 34,469,054 branches # 473.378 M/sec [86.37%] + 2,054,782 branch-misses # 5.96% of all branches [86.95%] + + 0.073987229 seconds time elapsed + +Iteration 2 + + Performance counter stats for 'java MatrixMultiplier 10': + + 74.167240 task-clock # 0.981 CPUs utilized + 146 context-switches # 0.002 M/sec + 44 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.057 M/sec + 217,762,691 cycles # 2.936 GHz [79.75%] + 119,247,170 stalled-cycles-frontend # 54.76% frontend cycles idle [82.58%] + 81,455,743 stalled-cycles-backend # 37.41% backend cycles idle [68.10%] + 183,508,037 instructions # 0.84 insns per cycle + # 0.65 stalled cycles per insn [84.40%] + 35,369,835 branches # 476.893 M/sec [85.66%] + 2,040,142 branch-misses # 5.77% of all branches [86.07%] + + 0.075620599 seconds time elapsed + +Iteration 3 + + Performance counter stats for 'java MatrixMultiplier 10': + + 75.588706 task-clock # 0.981 CPUs utilized + 148 context-switches # 0.002 M/sec + 47 CPU-migrations # 0.001 M/sec + 4,233 page-faults # 0.056 M/sec + 219,952,171 cycles # 2.910 GHz [83.32%] + 123,320,418 stalled-cycles-frontend # 56.07% frontend cycles idle [85.04%] + 82,988,638 stalled-cycles-backend # 37.73% backend cycles idle [68.62%] + 183,258,921 instructions # 0.83 insns per cycle + # 0.67 stalled cycles per insn [85.04%] + 34,664,677 branches # 458.596 M/sec [83.94%] + 2,046,911 branch-misses # 5.90% of all branches [81.13%] + + 0.077076913 seconds time elapsed + +Iteration 4 + + Performance counter stats for 'java MatrixMultiplier 10': + + 73.785766 task-clock # 0.983 CPUs utilized + 156 context-switches # 0.002 M/sec + 44 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.058 M/sec + 213,783,464 cycles # 2.897 GHz [83.60%] + 116,799,942 stalled-cycles-frontend # 54.63% frontend cycles idle [78.27%] + 77,769,074 stalled-cycles-backend # 36.38% backend cycles idle [68.20%] + 182,198,452 instructions # 0.85 insns per cycle + # 0.64 stalled cycles per insn [84.73%] + 35,172,437 branches # 476.683 M/sec [84.90%] + 2,002,118 branch-misses # 5.69% of all branches [87.69%] + + 0.075044359 seconds time elapsed + +Iteration 5 + + Performance counter stats for 'java MatrixMultiplier 10': + + 75.848184 task-clock # 0.963 CPUs utilized + 177 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.001 M/sec + 4,256 page-faults # 0.056 M/sec + 222,550,625 cycles # 2.934 GHz [75.92%] + 122,058,241 stalled-cycles-frontend # 54.85% frontend cycles idle [83.66%] + 82,982,505 stalled-cycles-backend # 37.29% backend cycles idle [68.71%] + 183,204,443 instructions # 0.82 insns per cycle + # 0.67 stalled cycles per insn [84.97%] + 35,265,600 branches # 464.950 M/sec [85.01%] + 2,010,327 branch-misses # 5.70% of all branches [88.69%] + + 0.078736540 seconds time elapsed + +Iteration 6 + + Performance counter stats for 'java MatrixMultiplier 10': + + 74.552674 task-clock # 0.978 CPUs utilized + 146 context-switches # 0.002 M/sec + 45 CPU-migrations # 0.001 M/sec + 4,252 page-faults # 0.057 M/sec + 220,109,964 cycles # 2.952 GHz [78.21%] + 119,486,246 stalled-cycles-frontend # 54.28% frontend cycles idle [84.59%] + 80,845,000 stalled-cycles-backend # 36.73% backend cycles idle [68.08%] + 184,366,056 instructions # 0.84 insns per cycle + # 0.65 stalled cycles per insn [84.04%] + 34,909,548 branches # 468.253 M/sec [84.14%] + 2,006,308 branch-misses # 5.75% of all branches [87.11%] + + 0.076255589 seconds time elapsed + +Iteration 7 + + Performance counter stats for 'java MatrixMultiplier 10': + + 74.919822 task-clock # 0.945 CPUs utilized + 136 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.001 M/sec + 4,232 page-faults # 0.056 M/sec + 215,665,200 cycles # 2.879 GHz [79.71%] + 122,784,131 stalled-cycles-frontend # 56.93% frontend cycles idle [86.01%] + 83,019,995 stalled-cycles-backend # 38.49% backend cycles idle [68.15%] + 184,014,109 instructions # 0.85 insns per cycle + # 0.67 stalled cycles per insn [84.43%] + 34,335,055 branches # 458.291 M/sec [85.30%] + 2,017,824 branch-misses # 5.88% of all branches [83.15%] + + 0.079275197 seconds time elapsed + +Iteration 8 + + Performance counter stats for 'java MatrixMultiplier 10': + + 72.325490 task-clock # 0.978 CPUs utilized + 136 context-switches # 0.002 M/sec + 48 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.059 M/sec + 211,331,070 cycles # 2.922 GHz [83.78%] + 115,877,185 stalled-cycles-frontend # 54.83% frontend cycles idle [77.28%] + 75,513,035 stalled-cycles-backend # 35.73% backend cycles idle [67.46%] + 182,895,864 instructions # 0.87 insns per cycle + # 0.63 stalled cycles per insn [83.79%] + 35,178,485 branches # 486.391 M/sec [85.16%] + 2,003,949 branch-misses # 5.70% of all branches [88.08%] + + 0.073979011 seconds time elapsed + +Iteration 9 + + Performance counter stats for 'java MatrixMultiplier 10': + + 74.615253 task-clock # 0.984 CPUs utilized + 136 context-switches # 0.002 M/sec + 41 CPU-migrations # 0.001 M/sec + 4,233 page-faults # 0.057 M/sec + 221,217,036 cycles # 2.965 GHz [78.81%] + 120,264,263 stalled-cycles-frontend # 54.36% frontend cycles idle [83.70%] + 80,040,282 stalled-cycles-backend # 36.18% backend cycles idle [68.39%] + 183,237,659 instructions # 0.83 insns per cycle + # 0.66 stalled cycles per insn [85.00%] + 35,439,125 branches # 474.958 M/sec [85.72%] + 2,039,792 branch-misses # 5.76% of all branches [85.90%] + + 0.075848324 seconds time elapsed + +Iteration 10 + + Performance counter stats for 'java MatrixMultiplier 10': + + 74.492689 task-clock # 0.973 CPUs utilized + 150 context-switches # 0.002 M/sec + 41 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.057 M/sec + 216,573,426 cycles # 2.907 GHz [84.20%] + 120,115,088 stalled-cycles-frontend # 55.46% frontend cycles idle [79.15%] + 81,505,498 stalled-cycles-backend # 37.63% backend cycles idle [67.75%] + 184,181,901 instructions # 0.85 insns per cycle + # 0.65 stalled cycles per insn [84.02%] + 35,341,758 branches # 474.433 M/sec [84.88%] + 2,031,210 branch-misses # 5.75% of all branches [86.54%] + + 0.076539126 seconds time elapsed + diff --git a/finalExperimentRuns/java_output_100 b/finalExperimentRuns/java_output_100 new file mode 100644 index 0000000..5ff2003 --- /dev/null +++ b/finalExperimentRuns/java_output_100 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for 'java MatrixMultiplier 100': + + 103.321799 task-clock # 1.117 CPUs utilized + 175 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.000 M/sec + 4,909 page-faults # 0.048 M/sec + 301,216,233 cycles # 2.915 GHz [84.32%] + 163,477,231 stalled-cycles-frontend # 54.27% frontend cycles idle [75.05%] + 103,759,590 stalled-cycles-backend # 34.45% backend cycles idle [66.56%] + 297,736,573 instructions # 0.99 insns per cycle + # 0.55 stalled cycles per insn [88.51%] + 55,643,691 branches # 538.547 M/sec [88.33%] + 2,889,288 branch-misses # 5.19% of all branches [87.38%] + + 0.092533164 seconds time elapsed + +Iteration 2 + + Performance counter stats for 'java MatrixMultiplier 100': + + 103.759715 task-clock # 1.117 CPUs utilized + 160 context-switches # 0.002 M/sec + 47 CPU-migrations # 0.000 M/sec + 4,901 page-faults # 0.047 M/sec + 305,125,075 cycles # 2.941 GHz [84.89%] + 160,369,545 stalled-cycles-frontend # 52.56% frontend cycles idle [74.82%] + 101,259,462 stalled-cycles-backend # 33.19% backend cycles idle [66.10%] + 287,525,333 instructions # 0.94 insns per cycle + # 0.56 stalled cycles per insn [89.35%] + 54,463,632 branches # 524.902 M/sec [88.47%] + 3,052,971 branch-misses # 5.61% of all branches [87.52%] + + 0.092870741 seconds time elapsed + +Iteration 3 + + Performance counter stats for 'java MatrixMultiplier 100': + + 104.375659 task-clock # 1.059 CPUs utilized + 159 context-switches # 0.002 M/sec + 44 CPU-migrations # 0.000 M/sec + 4,899 page-faults # 0.047 M/sec + 312,875,028 cycles # 2.998 GHz [77.06%] + 168,819,560 stalled-cycles-frontend # 53.96% frontend cycles idle [78.79%] + 101,445,345 stalled-cycles-backend # 32.42% backend cycles idle [71.36%] + 301,146,158 instructions # 0.96 insns per cycle + # 0.56 stalled cycles per insn [88.93%] + 55,088,281 branches # 527.789 M/sec [88.62%] + 2,998,976 branch-misses # 5.44% of all branches [85.92%] + + 0.098543800 seconds time elapsed + +Iteration 4 + + Performance counter stats for 'java MatrixMultiplier 100': + + 106.506564 task-clock # 1.098 CPUs utilized + 150 context-switches # 0.001 M/sec + 44 CPU-migrations # 0.000 M/sec + 4,908 page-faults # 0.046 M/sec + 314,107,537 cycles # 2.949 GHz [80.00%] + 168,129,079 stalled-cycles-frontend # 53.53% frontend cycles idle [83.52%] + 110,233,792 stalled-cycles-backend # 35.09% backend cycles idle [61.34%] + 267,033,663 instructions # 0.85 insns per cycle + # 0.63 stalled cycles per insn [88.69%] + 54,684,767 branches # 513.440 M/sec [89.14%] + 3,034,135 branch-misses # 5.55% of all branches [87.76%] + + 0.096957055 seconds time elapsed + +Iteration 5 + + Performance counter stats for 'java MatrixMultiplier 100': + + 103.919871 task-clock # 1.123 CPUs utilized + 172 context-switches # 0.002 M/sec + 44 CPU-migrations # 0.000 M/sec + 4,916 page-faults # 0.047 M/sec + 302,840,737 cycles # 2.914 GHz [85.86%] + 156,296,630 stalled-cycles-frontend # 51.61% frontend cycles idle [83.16%] + 112,184,665 stalled-cycles-backend # 37.04% backend cycles idle [58.29%] + 266,462,000 instructions # 0.88 insns per cycle + # 0.59 stalled cycles per insn [80.55%] + 54,001,529 branches # 519.646 M/sec [87.11%] + 2,916,764 branch-misses # 5.40% of all branches [87.46%] + + 0.092571453 seconds time elapsed + +Iteration 6 + + Performance counter stats for 'java MatrixMultiplier 100': + + 104.129244 task-clock # 1.110 CPUs utilized + 167 context-switches # 0.002 M/sec + 40 CPU-migrations # 0.000 M/sec + 4,901 page-faults # 0.047 M/sec + 305,068,140 cycles # 2.930 GHz [84.38%] + 160,683,594 stalled-cycles-frontend # 52.67% frontend cycles idle [83.04%] + 108,136,607 stalled-cycles-backend # 35.45% backend cycles idle [58.36%] + 262,531,379 instructions # 0.86 insns per cycle + # 0.61 stalled cycles per insn [84.17%] + 52,184,320 branches # 501.150 M/sec [87.66%] + 3,073,796 branch-misses # 5.89% of all branches [88.33%] + + 0.093834708 seconds time elapsed + +Iteration 7 + + Performance counter stats for 'java MatrixMultiplier 100': + + 104.108239 task-clock # 1.125 CPUs utilized + 157 context-switches # 0.002 M/sec + 42 CPU-migrations # 0.000 M/sec + 4,898 page-faults # 0.047 M/sec + 307,017,870 cycles # 2.949 GHz [84.90%] + 164,306,170 stalled-cycles-frontend # 53.52% frontend cycles idle [77.93%] + 103,125,565 stalled-cycles-backend # 33.59% backend cycles idle [65.45%] + 288,379,192 instructions # 0.94 insns per cycle + # 0.57 stalled cycles per insn [85.16%] + 53,702,811 branches # 515.836 M/sec [86.05%] + 3,044,773 branch-misses # 5.67% of all branches [87.57%] + + 0.092529815 seconds time elapsed + +Iteration 8 + + Performance counter stats for 'java MatrixMultiplier 100': + + 104.765434 task-clock # 1.108 CPUs utilized + 169 context-switches # 0.002 M/sec + 45 CPU-migrations # 0.000 M/sec + 4,901 page-faults # 0.047 M/sec + 307,535,192 cycles # 2.935 GHz [84.82%] + 164,207,959 stalled-cycles-frontend # 53.39% frontend cycles idle [79.74%] + 106,454,679 stalled-cycles-backend # 34.62% backend cycles idle [60.71%] + 268,169,951 instructions # 0.87 insns per cycle + # 0.61 stalled cycles per insn [88.10%] + 54,860,528 branches # 523.651 M/sec [88.40%] + 2,968,193 branch-misses # 5.41% of all branches [87.39%] + + 0.094565311 seconds time elapsed + +Iteration 9 + + Performance counter stats for 'java MatrixMultiplier 100': + + 105.879995 task-clock # 1.108 CPUs utilized + 181 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.000 M/sec + 4,910 page-faults # 0.046 M/sec + 309,713,266 cycles # 2.925 GHz [86.13%] + 164,906,341 stalled-cycles-frontend # 53.24% frontend cycles idle [79.10%] + 111,500,921 stalled-cycles-backend # 36.00% backend cycles idle [61.51%] + 272,166,057 instructions # 0.88 insns per cycle + # 0.61 stalled cycles per insn [87.40%] + 53,079,383 branches # 501.316 M/sec [88.80%] + 3,046,810 branch-misses # 5.74% of all branches [86.44%] + + 0.095518615 seconds time elapsed + +Iteration 10 + + Performance counter stats for 'java MatrixMultiplier 100': + + 104.498642 task-clock # 1.120 CPUs utilized + 166 context-switches # 0.002 M/sec + 43 CPU-migrations # 0.000 M/sec + 4,912 page-faults # 0.047 M/sec + 306,585,731 cycles # 2.934 GHz [86.11%] + 157,952,036 stalled-cycles-frontend # 51.52% frontend cycles idle [76.53%] + 109,596,785 stalled-cycles-backend # 35.75% backend cycles idle [65.42%] + 284,918,343 instructions # 0.93 insns per cycle + # 0.55 stalled cycles per insn [88.44%] + 54,016,381 branches # 516.910 M/sec [88.80%] + 3,056,357 branch-misses # 5.66% of all branches [85.71%] + + 0.093262045 seconds time elapsed + diff --git a/finalExperimentRuns/java_output_1000 b/finalExperimentRuns/java_output_1000 new file mode 100644 index 0000000..0e72482 --- /dev/null +++ b/finalExperimentRuns/java_output_1000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8230.895013 task-clock # 1.000 CPUs utilized + 349 context-switches # 0.000 M/sec + 51 CPU-migrations # 0.000 M/sec + 7,924 page-faults # 0.001 M/sec + 25,179,610,041 cycles # 3.059 GHz [83.31%] + 19,733,923,171 stalled-cycles-frontend # 78.37% frontend cycles idle [83.35%] + 5,763,803,893 stalled-cycles-backend # 22.89% backend cycles idle [66.59%] + 11,489,775,504 instructions # 0.46 insns per cycle + # 1.72 stalled cycles per insn [83.39%] + 1,586,764,019 branches # 192.781 M/sec [83.37%] + 5,033,764 branch-misses # 0.32% of all branches [83.39%] + + 8.231041043 seconds time elapsed + +Iteration 2 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8259.814170 task-clock # 1.000 CPUs utilized + 348 context-switches # 0.000 M/sec + 49 CPU-migrations # 0.000 M/sec + 7,921 page-faults # 0.001 M/sec + 25,267,638,160 cycles # 3.059 GHz [83.34%] + 19,841,583,037 stalled-cycles-frontend # 78.53% frontend cycles idle [83.21%] + 6,527,775,541 stalled-cycles-backend # 25.83% backend cycles idle [66.69%] + 11,513,813,855 instructions # 0.46 insns per cycle + # 1.72 stalled cycles per insn [83.38%] + 1,587,852,465 branches # 192.238 M/sec [83.37%] + 5,136,172 branch-misses # 0.32% of all branches [83.43%] + + 8.260232738 seconds time elapsed + +Iteration 3 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8194.756476 task-clock # 1.000 CPUs utilized + 375 context-switches # 0.000 M/sec + 52 CPU-migrations # 0.000 M/sec + 7,917 page-faults # 0.001 M/sec + 25,071,710,073 cycles # 3.059 GHz [83.35%] + 19,634,941,038 stalled-cycles-frontend # 78.32% frontend cycles idle [83.36%] + 5,481,936,492 stalled-cycles-backend # 21.87% backend cycles idle [66.57%] + 11,497,785,971 instructions # 0.46 insns per cycle + # 1.71 stalled cycles per insn [83.26%] + 1,585,354,953 branches # 193.460 M/sec [83.36%] + 4,624,130 branch-misses # 0.29% of all branches [83.37%] + + 8.195429601 seconds time elapsed + +Iteration 4 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8289.424086 task-clock # 1.000 CPUs utilized + 351 context-switches # 0.000 M/sec + 50 CPU-migrations # 0.000 M/sec + 7,924 page-faults # 0.001 M/sec + 25,357,184,443 cycles # 3.059 GHz [83.40%] + 19,924,099,338 stalled-cycles-frontend # 78.57% frontend cycles idle [83.30%] + 5,665,731,789 stalled-cycles-backend # 22.34% backend cycles idle [66.62%] + 11,497,881,177 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.36%] + 1,586,295,230 branches # 191.364 M/sec [83.35%] + 4,864,960 branch-misses # 0.31% of all branches [83.36%] + + 8.289548645 seconds time elapsed + +Iteration 5 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8295.510219 task-clock # 1.000 CPUs utilized + 344 context-switches # 0.000 M/sec + 45 CPU-migrations # 0.000 M/sec + 7,914 page-faults # 0.001 M/sec + 25,375,983,015 cycles # 3.059 GHz [83.35%] + 19,927,717,481 stalled-cycles-frontend # 78.53% frontend cycles idle [83.38%] + 5,775,388,326 stalled-cycles-backend # 22.76% backend cycles idle [66.64%] + 11,510,303,341 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.34%] + 1,584,048,659 branches # 190.953 M/sec [83.33%] + 4,757,923 branch-misses # 0.30% of all branches [83.34%] + + 8.295597912 seconds time elapsed + +Iteration 6 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8279.033166 task-clock # 1.000 CPUs utilized + 338 context-switches # 0.000 M/sec + 49 CPU-migrations # 0.000 M/sec + 7,921 page-faults # 0.001 M/sec + 25,325,475,010 cycles # 3.059 GHz [83.38%] + 19,889,516,972 stalled-cycles-frontend # 78.54% frontend cycles idle [83.24%] + 6,028,939,780 stalled-cycles-backend # 23.81% backend cycles idle [66.57%] + 11,486,000,036 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.42%] + 1,588,599,157 branches # 191.882 M/sec [83.43%] + 4,920,256 branch-misses # 0.31% of all branches [83.40%] + + 8.280647843 seconds time elapsed + +Iteration 7 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8238.691565 task-clock # 0.979 CPUs utilized + 350 context-switches # 0.000 M/sec + 60 CPU-migrations # 0.000 M/sec + 7,908 page-faults # 0.001 M/sec + 25,200,142,210 cycles # 3.059 GHz [83.36%] + 19,774,075,612 stalled-cycles-frontend # 78.47% frontend cycles idle [83.35%] + 5,690,178,267 stalled-cycles-backend # 22.58% backend cycles idle [66.56%] + 11,493,062,103 instructions # 0.46 insns per cycle + # 1.72 stalled cycles per insn [83.30%] + 1,590,593,567 branches # 193.064 M/sec [83.37%] + 4,824,786 branch-misses # 0.30% of all branches [83.40%] + + 8.418598063 seconds time elapsed + +Iteration 8 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8261.292085 task-clock # 1.000 CPUs utilized + 363 context-switches # 0.000 M/sec + 47 CPU-migrations # 0.000 M/sec + 7,924 page-faults # 0.001 M/sec + 25,267,234,464 cycles # 3.059 GHz [83.38%] + 19,828,388,563 stalled-cycles-frontend # 78.47% frontend cycles idle [83.37%] + 5,913,581,026 stalled-cycles-backend # 23.40% backend cycles idle [66.54%] + 11,491,742,369 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.26%] + 1,584,928,928 branches # 191.850 M/sec [83.35%] + 4,710,317 branch-misses # 0.30% of all branches [83.39%] + + 8.263920402 seconds time elapsed + +Iteration 9 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8159.608685 task-clock # 1.000 CPUs utilized + 352 context-switches # 0.000 M/sec + 44 CPU-migrations # 0.000 M/sec + 7,922 page-faults # 0.001 M/sec + 24,966,244,868 cycles # 3.060 GHz [83.38%] + 19,528,618,282 stalled-cycles-frontend # 78.22% frontend cycles idle [83.38%] + 5,211,081,380 stalled-cycles-backend # 20.87% backend cycles idle [66.60%] + 11,505,569,740 instructions # 0.46 insns per cycle + # 1.70 stalled cycles per insn [83.31%] + 1,582,342,745 branches # 193.924 M/sec [83.29%] + 4,717,778 branch-misses # 0.30% of all branches [83.38%] + + 8.161167189 seconds time elapsed + +Iteration 10 + + Performance counter stats for 'java MatrixMultiplier 1000': + + 8325.978080 task-clock # 1.000 CPUs utilized + 336 context-switches # 0.000 M/sec + 51 CPU-migrations # 0.000 M/sec + 7,920 page-faults # 0.001 M/sec + 25,465,415,040 cycles # 3.059 GHz [83.39%] + 20,014,451,796 stalled-cycles-frontend # 78.59% frontend cycles idle [83.35%] + 5,716,481,850 stalled-cycles-backend # 22.45% backend cycles idle [66.57%] + 11,496,878,405 instructions # 0.45 insns per cycle + # 1.74 stalled cycles per insn [83.29%] + 1,583,971,382 branches # 190.244 M/sec [83.35%] + 4,654,654 branch-misses # 0.29% of all branches [83.37%] + + 8.327184267 seconds time elapsed + diff --git a/finalExperimentRuns/java_output_3000 b/finalExperimentRuns/java_output_3000 new file mode 100644 index 0000000..bcb0278 --- /dev/null +++ b/finalExperimentRuns/java_output_3000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 570246.408556 task-clock # 0.998 CPUs utilized + 13,594 context-switches # 0.000 M/sec + 67 CPU-migrations # 0.000 M/sec + 55,723 page-faults # 0.000 M/sec + 1,745,855,585,043 cycles # 3.062 GHz [83.33%] + 1,586,556,006,830 stalled-cycles-frontend # 90.88% frontend cycles idle [83.33%] + 667,253,690,712 stalled-cycles-backend # 38.22% backend cycles idle [66.66%] + 299,663,523,851 instructions # 0.17 insns per cycle + # 5.29 stalled cycles per insn [83.33%] + 40,925,007,356 branches # 71.767 M/sec [83.34%] + 21,839,815 branch-misses # 0.05% of all branches [83.34%] + + 571.200174838 seconds time elapsed + +Iteration 2 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 526618.685030 task-clock # 0.998 CPUs utilized + 12,618 context-switches # 0.000 M/sec + 65 CPU-migrations # 0.000 M/sec + 54,849 page-faults # 0.000 M/sec + 1,612,309,554,009 cycles # 3.062 GHz [83.33%] + 1,453,911,024,610 stalled-cycles-frontend # 90.18% frontend cycles idle [83.33%] + 604,419,114,381 stalled-cycles-backend # 37.49% backend cycles idle [66.67%] + 299,550,354,418 instructions # 0.19 insns per cycle + # 4.85 stalled cycles per insn [83.34%] + 40,909,432,652 branches # 77.683 M/sec [83.33%] + 21,245,062 branch-misses # 0.05% of all branches [83.33%] + + 527.548551694 seconds time elapsed + +Iteration 3 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 535466.680458 task-clock # 0.998 CPUs utilized + 12,846 context-switches # 0.000 M/sec + 62 CPU-migrations # 0.000 M/sec + 54,956 page-faults # 0.000 M/sec + 1,639,046,450,367 cycles # 3.061 GHz [83.34%] + 1,480,430,768,242 stalled-cycles-frontend # 90.32% frontend cycles idle [83.33%] + 601,908,499,977 stalled-cycles-backend # 36.72% backend cycles idle [66.66%] + 299,553,327,633 instructions # 0.18 insns per cycle + # 4.94 stalled cycles per insn [83.34%] + 40,910,210,705 branches # 76.401 M/sec [83.33%] + 20,911,209 branch-misses # 0.05% of all branches [83.34%] + + 536.698723828 seconds time elapsed + +Iteration 4 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 552650.707551 task-clock # 0.997 CPUs utilized + 12,976 context-switches # 0.000 M/sec + 77 CPU-migrations # 0.000 M/sec + 55,114 page-faults # 0.000 M/sec + 1,691,511,828,074 cycles # 3.061 GHz [83.33%] + 1,532,208,381,110 stalled-cycles-frontend # 90.58% frontend cycles idle [83.33%] + 621,744,344,523 stalled-cycles-backend # 36.76% backend cycles idle [66.67%] + 299,846,020,001 instructions # 0.18 insns per cycle + # 5.11 stalled cycles per insn [83.34%] + 40,958,238,817 branches # 74.112 M/sec [83.33%] + 21,459,929 branch-misses # 0.05% of all branches [83.33%] + + 554.276915012 seconds time elapsed + +Iteration 5 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 556814.234121 task-clock # 0.998 CPUs utilized + 13,237 context-switches # 0.000 M/sec + 70 CPU-migrations # 0.000 M/sec + 55,552 page-faults # 0.000 M/sec + 1,704,664,130,406 cycles # 3.061 GHz [83.34%] + 1,545,675,582,807 stalled-cycles-frontend # 90.67% frontend cycles idle [83.33%] + 633,094,029,679 stalled-cycles-backend # 37.14% backend cycles idle [66.67%] + 299,667,160,749 instructions # 0.18 insns per cycle + # 5.16 stalled cycles per insn [83.33%] + 40,929,141,875 branches # 73.506 M/sec [83.33%] + 21,024,237 branch-misses # 0.05% of all branches [83.33%] + + 557.952865517 seconds time elapsed + +Iteration 6 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 560832.876628 task-clock # 0.998 CPUs utilized + 12,793 context-switches # 0.000 M/sec + 77 CPU-migrations # 0.000 M/sec + 55,513 page-faults # 0.000 M/sec + 1,713,840,185,302 cycles # 3.056 GHz [83.34%] + 1,555,002,421,280 stalled-cycles-frontend # 90.73% frontend cycles idle [83.33%] + 637,113,380,734 stalled-cycles-backend # 37.17% backend cycles idle [66.66%] + 299,715,387,470 instructions # 0.17 insns per cycle + # 5.19 stalled cycles per insn [83.33%] + 40,932,284,030 branches # 72.985 M/sec [83.33%] + 21,650,917 branch-misses # 0.05% of all branches [83.34%] + + 561.996199196 seconds time elapsed + +Iteration 7 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 550567.500826 task-clock # 0.998 CPUs utilized + 12,798 context-switches # 0.000 M/sec + 82 CPU-migrations # 0.000 M/sec + 55,289 page-faults # 0.000 M/sec + 1,684,502,992,534 cycles # 3.060 GHz [83.34%] + 1,525,773,333,416 stalled-cycles-frontend # 90.58% frontend cycles idle [83.34%] + 624,056,379,930 stalled-cycles-backend # 37.05% backend cycles idle [66.66%] + 299,717,851,374 instructions # 0.18 insns per cycle + # 5.09 stalled cycles per insn [83.33%] + 40,933,551,765 branches # 74.348 M/sec [83.33%] + 21,857,346 branch-misses # 0.05% of all branches [83.33%] + + 551.751018706 seconds time elapsed + +Iteration 8 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 555494.391825 task-clock # 0.998 CPUs utilized + 12,852 context-switches # 0.000 M/sec + 58 CPU-migrations # 0.000 M/sec + 55,583 page-faults # 0.000 M/sec + 1,698,991,939,785 cycles # 3.059 GHz [83.34%] + 1,540,286,255,055 stalled-cycles-frontend # 90.66% frontend cycles idle [83.33%] + 640,738,068,956 stalled-cycles-backend # 37.71% backend cycles idle [66.66%] + 299,551,757,727 instructions # 0.18 insns per cycle + # 5.14 stalled cycles per insn [83.33%] + 40,913,471,509 branches # 73.652 M/sec [83.33%] + 20,264,283 branch-misses # 0.05% of all branches [83.34%] + + 556.645024828 seconds time elapsed + +Iteration 9 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 546762.798329 task-clock # 0.998 CPUs utilized + 13,058 context-switches # 0.000 M/sec + 64 CPU-migrations # 0.000 M/sec + 55,257 page-faults # 0.000 M/sec + 1,673,716,383,400 cycles # 3.061 GHz [83.33%] + 1,514,986,220,797 stalled-cycles-frontend # 90.52% frontend cycles idle [83.33%] + 625,080,656,461 stalled-cycles-backend # 37.35% backend cycles idle [66.66%] + 299,613,800,760 instructions # 0.18 insns per cycle + # 5.06 stalled cycles per insn [83.33%] + 40,915,862,013 branches # 74.833 M/sec [83.34%] + 20,910,287 branch-misses # 0.05% of all branches [83.34%] + + 547.893249032 seconds time elapsed + +Iteration 10 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 548287.545724 task-clock # 0.998 CPUs utilized + 12,824 context-switches # 0.000 M/sec + 78 CPU-migrations # 0.000 M/sec + 55,174 page-faults # 0.000 M/sec + 1,678,345,846,454 cycles # 3.061 GHz [83.33%] + 1,519,483,980,886 stalled-cycles-frontend # 90.53% frontend cycles idle [83.34%] + 617,643,582,895 stalled-cycles-backend # 36.80% backend cycles idle [66.66%] + 299,920,288,315 instructions # 0.18 insns per cycle + # 5.07 stalled cycles per insn [83.33%] + 40,977,709,387 branches # 74.738 M/sec [83.33%] + 22,271,141 branch-misses # 0.05% of all branches [83.34%] + + 549.477799232 seconds time elapsed + diff --git a/finalExperimentRuns/matrixMultiply b/finalExperimentRuns/matrixMultiply new file mode 100755 index 0000000..70e1311 Binary files /dev/null and b/finalExperimentRuns/matrixMultiply differ diff --git a/finalExperimentRuns/matrixMultiply.c b/finalExperimentRuns/matrixMultiply.c new file mode 100644 index 0000000..4791276 --- /dev/null +++ b/finalExperimentRuns/matrixMultiply.c @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include + +static int **matrixA; +static int **matrixB; +static int **matrixC; +static int N; + +static int randomMaxValue = 100; + +void setUpMatrices() { + + int i = 0; + + matrixA = malloc(sizeof(int *) * N); + matrixB = malloc(sizeof(int *) * N); + matrixC = malloc(sizeof(int *) * N); + + + + for (i=0 ; i +#include +#include +#include +#include + +static int **matrixA; +static int **matrixB; +static int **matrixC; +static int N; + +static int randomMaxValue = 100; + +void setUpMatrices() { + + int i = 0; + + matrixA = malloc(sizeof(int *) * N); + matrixB = malloc(sizeof(int *) * N); + matrixC = malloc(sizeof(int *) * N); + + + + for (i=0 ; i " + exit +fi + + + +STR_C_GCC="gcc_output_$1" +STR_JAVA="java_output_$1" +STR_PYTHON="python_output_$1" +STR_C_CLANG="clang_output_$1" +STR_C_GCC_OPTIMIZED="gcc_output_optimized_$1" +STR_C_CLANG_OPTIMIZED="clang_output_optimized_$1" + +for i in {1..10} +do + + echo "Iteration $i" >> $STR_C_GCC + echo "Iteration $i" >> $STR_JAVA + echo "Iteration $i" >> $STR_PYTHON + echo "Iteration $i" >> $STR_C_CLANG + echo "Iteration $1" >> $STR_C_GCC_OPTIMIZED + echo "Iteration $1" >> $STR_C_CLANG_OPTIMIZED + + 3>>$STR_C_GCC perf stat --log-fd 3 ./$2 $1 > /dev/null + 3>>$STR_JAVA perf stat --log-fd 3 java $3 $1 > /dev/null + 3>>$STR_PYTHON perf stat --log-fd 3 python $4 $1 > /dev/null + 3>>$STR_C_CLANG perf stat --log-fd 3 ./$5 $1 > /dev/null + 3>>$STR_C_GCC_OPTIMIZED perf stat --log-fd 3 ./$6 $1 > /dev/null + 3>>$STR_C_CLANG_OPTIMIZED perf stat --log-fd 3 ./$7 $1 > /dev/null +done + + diff --git a/finalExperimentRuns/threading/.MatrixMultiplier.java.swp b/finalExperimentRuns/threading/.MatrixMultiplier.java.swp new file mode 100644 index 0000000..774971d Binary files /dev/null and b/finalExperimentRuns/threading/.MatrixMultiplier.java.swp differ diff --git a/finalExperimentRuns/threading/MatrixMultiplier$ThreadMultiply.class b/finalExperimentRuns/threading/MatrixMultiplier$ThreadMultiply.class new file mode 100644 index 0000000..6e53c73 Binary files /dev/null and b/finalExperimentRuns/threading/MatrixMultiplier$ThreadMultiply.class differ diff --git a/finalExperimentRuns/threading/MatrixMultiplier.class b/finalExperimentRuns/threading/MatrixMultiplier.class new file mode 100644 index 0000000..889ccff Binary files /dev/null and b/finalExperimentRuns/threading/MatrixMultiplier.class differ diff --git a/finalExperimentRuns/threading/MatrixMultiplier.java b/finalExperimentRuns/threading/MatrixMultiplier.java new file mode 100644 index 0000000..2a5204f --- /dev/null +++ b/finalExperimentRuns/threading/MatrixMultiplier.java @@ -0,0 +1,181 @@ +import java.util.*; + +public class MatrixMultiplier { + + static int[][] matrixA; + static int[][] matrixB; + static int[][] matrixC; + + static int matrixDimension; + + public MatrixMultiplier (int N) { + + matrixA = new int[N][N]; + matrixB = new int[N][N]; + matrixC = new int[N][N]; + matrixDimension = N; + + } + + public MatrixMultiplier () { + + } + + public void fillMatrixRandom() { + + + Random rn = new Random(); + + + int i = 0; + int j = 0; + + for (i=0; i +#include +#include +#include +#include +#include + +#define RANDOM_MAX_VALUE 100 + +static int **matrixA; +static int **matrixB; +static int **matrixC; +static int N; + +void setUpMatrices() { + + int i = 0; + + matrixA = malloc(sizeof(int *) * N); + matrixB = malloc(sizeof(int *) * N); + matrixC = malloc(sizeof(int *) * N); + + + + for (i=0 ; i +#include +#include +#include +#include +#include +#include + +#define RANDOM_MAX_VALUE 10 + +static int **matrixA; +static int **matrixB; +static int **matrixC; +static int N; + +struct threadArguments { + + int row; + int column; +}; + +void setUpMatrices() { + + int i = 0; + + matrixA = malloc(sizeof(int *) * N); + matrixB = malloc(sizeof(int *) * N); + matrixC = malloc(sizeof(int *) * N); + + + + for (i=0 ; irow; + int currentColumn = threadArgs->column; + + + for (i=0; i branch-misses - 0.001220843 seconds time elapsed + 0.001226038 seconds time elapsed Iteration 2 Performance counter stats for './matrixMultiply_clang 10': - 0.260170 task-clock # 0.323 CPUs utilized + 0.263159 task-clock # 0.297 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.484 M/sec - 791,246 cycles # 3.041 GHz - 483,075 stalled-cycles-frontend # 61.05% frontend cycles idle - 321,165 stalled-cycles-backend # 40.59% backend cycles idle - 546,129 instructions # 0.69 insns per cycle + 127 page-faults # 0.483 M/sec + 791,625 cycles # 3.008 GHz + 482,299 stalled-cycles-frontend # 60.93% frontend cycles idle + 319,678 stalled-cycles-backend # 40.38% backend cycles idle + 550,734 instructions # 0.70 insns per cycle # 0.88 stalled cycles per insn - 105,385 branches # 405.062 M/sec + 106,158 branches # 403.399 M/sec branch-misses - 0.000804638 seconds time elapsed + 0.000886637 seconds time elapsed Iteration 3 Performance counter stats for './matrixMultiply_clang 10': - 0.265903 task-clock # 0.296 CPUs utilized + 0.263879 task-clock # 0.315 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.478 M/sec - 800,185 cycles # 3.009 GHz - 489,441 stalled-cycles-frontend # 61.17% frontend cycles idle - 292,939 stalled-cycles-backend # 36.61% backend cycles idle - 553,591 instructions # 0.69 insns per cycle - # 0.88 stalled cycles per insn - 106,658 branches # 401.116 M/sec + 127 page-faults # 0.481 M/sec + 801,437 cycles # 3.037 GHz + 489,881 stalled-cycles-frontend # 61.13% frontend cycles idle + 298,681 stalled-cycles-backend # 37.27% backend cycles idle + 552,821 instructions # 0.69 insns per cycle + # 0.89 stalled cycles per insn + 106,287 branches # 402.787 M/sec branch-misses - 0.000897592 seconds time elapsed + 0.000838731 seconds time elapsed Iteration 4 Performance counter stats for './matrixMultiply_clang 10': - 0.261262 task-clock # 0.266 CPUs utilized + 0.261170 task-clock # 0.325 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.482 M/sec - 793,774 cycles # 3.038 GHz - 483,727 stalled-cycles-frontend # 60.94% frontend cycles idle - 330,050 stalled-cycles-backend # 41.58% backend cycles idle - 550,492 instructions # 0.69 insns per cycle - # 0.88 stalled cycles per insn - 106,129 branches # 406.217 M/sec + 127 page-faults # 0.486 M/sec + 794,662 cycles # 3.043 GHz + 483,324 stalled-cycles-frontend # 60.82% frontend cycles idle + 324,726 stalled-cycles-backend # 40.86% backend cycles idle + 552,409 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 106,457 branches # 407.616 M/sec branch-misses - 0.000982477 seconds time elapsed + 0.000802414 seconds time elapsed Iteration 5 Performance counter stats for './matrixMultiply_clang 10': - 0.262448 task-clock # 0.325 CPUs utilized + 0.259817 task-clock # 0.203 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.480 M/sec - 798,906 cycles # 3.044 GHz - 488,680 stalled-cycles-frontend # 61.17% frontend cycles idle - 331,857 stalled-cycles-backend # 41.54% backend cycles idle - 551,880 instructions # 0.69 insns per cycle - # 0.89 stalled cycles per insn - 106,196 branches # 404.636 M/sec + 127 page-faults # 0.489 M/sec + 789,490 cycles # 3.039 GHz + 480,273 stalled-cycles-frontend # 60.83% frontend cycles idle + 338,648 stalled-cycles-backend # 42.89% backend cycles idle + 549,001 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 105,856 branches # 407.425 M/sec branch-misses - 0.000806293 seconds time elapsed + 0.001277214 seconds time elapsed Iteration 6 Performance counter stats for './matrixMultiply_clang 10': - 0.262853 task-clock # 0.198 CPUs utilized + 0.259312 task-clock # 0.237 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.479 M/sec - 799,558 cycles # 3.042 GHz - 487,996 stalled-cycles-frontend # 61.03% frontend cycles idle - 325,493 stalled-cycles-backend # 40.71% backend cycles idle - 550,468 instructions # 0.69 insns per cycle + 127 page-faults # 0.490 M/sec + 788,093 cycles # 3.039 GHz + 480,367 stalled-cycles-frontend # 60.95% frontend cycles idle + 337,791 stalled-cycles-backend # 42.86% backend cycles idle + 542,295 instructions # 0.69 insns per cycle # 0.89 stalled cycles per insn - 106,139 branches # 403.796 M/sec + 104,332 branches # 402.342 M/sec branch-misses - 0.001325592 seconds time elapsed + 0.001092917 seconds time elapsed Iteration 7 Performance counter stats for './matrixMultiply_clang 10': - 0.261662 task-clock # 0.194 CPUs utilized + 0.291974 task-clock # 0.199 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.485 M/sec - 795,241 cycles # 3.039 GHz - 484,709 stalled-cycles-frontend # 60.95% frontend cycles idle - 318,825 stalled-cycles-backend # 40.09% backend cycles idle - 552,907 instructions # 0.70 insns per cycle - # 0.88 stalled cycles per insn - 106,529 branches # 407.124 M/sec - branch-misses + 126 page-faults # 0.432 M/sec + 202,297 cycles # 0.693 GHz + 553,682 stalled-cycles-frontend # 273.70% frontend cycles idle + 387,858 stalled-cycles-backend # 191.73% backend cycles idle + 548,059 instructions # 2.71 insns per cycle + # 1.01 stalled cycles per insn + 105,755 branches # 362.207 M/sec + 7,631 branch-misses # 7.22% of all branches [73.27%] - 0.001346394 seconds time elapsed + 0.001464449 seconds time elapsed Iteration 8 Performance counter stats for './matrixMultiply_clang 10': - 0.260743 task-clock # 0.229 CPUs utilized + 0.260737 task-clock # 0.142 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 127 page-faults # 0.487 M/sec - 792,813 cycles # 3.041 GHz - 482,015 stalled-cycles-frontend # 60.80% frontend cycles idle - 325,041 stalled-cycles-backend # 41.00% backend cycles idle - 553,391 instructions # 0.70 insns per cycle + 791,866 cycles # 3.037 GHz + 481,917 stalled-cycles-frontend # 60.86% frontend cycles idle + 338,794 stalled-cycles-backend # 42.78% backend cycles idle + 554,114 instructions # 0.70 insns per cycle # 0.87 stalled cycles per insn - 106,659 branches # 409.058 M/sec + 106,770 branches # 409.493 M/sec branch-misses - 0.001138278 seconds time elapsed + 0.001834688 seconds time elapsed Iteration 9 Performance counter stats for './matrixMultiply_clang 10': - 0.261250 task-clock # 0.323 CPUs utilized + 0.265581 task-clock # 0.269 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.482 M/sec - 794,471 cycles # 3.041 GHz - 483,254 stalled-cycles-frontend # 60.83% frontend cycles idle - 299,255 stalled-cycles-backend # 37.67% backend cycles idle - 556,212 instructions # 0.70 insns per cycle - # 0.87 stalled cycles per insn - 107,136 branches # 410.090 M/sec + 127 page-faults # 0.478 M/sec + 798,282 cycles # 3.006 GHz + 486,961 stalled-cycles-frontend # 61.00% frontend cycles idle + 343,026 stalled-cycles-backend # 42.97% backend cycles idle + 555,199 instructions # 0.70 insns per cycle + # 0.88 stalled cycles per insn + 106,951 branches # 402.706 M/sec branch-misses - 0.000808962 seconds time elapsed + 0.000988402 seconds time elapsed Iteration 10 Performance counter stats for './matrixMultiply_clang 10': - 0.260392 task-clock # 0.312 CPUs utilized + 0.260504 task-clock # 0.180 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 126 page-faults # 0.484 M/sec - 791,548 cycles # 3.040 GHz - 480,852 stalled-cycles-frontend # 60.75% frontend cycles idle - 335,540 stalled-cycles-backend # 42.39% backend cycles idle - 553,631 instructions # 0.70 insns per cycle + 792,825 cycles # 3.043 GHz + 482,255 stalled-cycles-frontend # 60.83% frontend cycles idle + 333,878 stalled-cycles-backend # 42.11% backend cycles idle + 551,967 instructions # 0.70 insns per cycle # 0.87 stalled cycles per insn - 106,679 branches # 409.686 M/sec + 106,390 branches # 408.401 M/sec branch-misses - 0.000835158 seconds time elapsed + 0.001444313 seconds time elapsed diff --git a/sankalp/clang_output_100 b/sankalp/clang_output_100 index b8581c8..62ab0c3 100644 --- a/sankalp/clang_output_100 +++ b/sankalp/clang_output_100 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for './matrixMultiply_clang 100': - 7.081909 task-clock # 0.296 CPUs utilized + 7.053268 task-clock # 0.907 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 159 page-faults # 0.022 M/sec - 21,539,725 cycles # 3.042 GHz [43.77%] - 10,064,535 stalled-cycles-frontend # 46.73% frontend cycles idle - 1,705,546 stalled-cycles-backend # 7.92% backend cycles idle - 26,014,618 instructions # 1.21 insns per cycle - # 0.39 stalled cycles per insn - 2,497,944 branches # 352.722 M/sec - 11,875 branch-misses # 0.48% of all branches [74.92%] + 158 page-faults # 0.022 M/sec + 21,542,512 cycles # 3.054 GHz [43.51%] + 10,448,377 stalled-cycles-frontend # 48.50% frontend cycles idle + 1,620,562 stalled-cycles-backend # 7.52% backend cycles idle + 26,010,263 instructions # 1.21 insns per cycle + # 0.40 stalled cycles per insn + 2,497,209 branches # 354.050 M/sec + 11,897 branch-misses # 0.48% of all branches [71.92%] - 0.023890850 seconds time elapsed + 0.007776978 seconds time elapsed Iteration 2 Performance counter stats for './matrixMultiply_clang 100': - 7.115157 task-clock # 0.390 CPUs utilized + 7.156865 task-clock # 0.912 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 158 page-faults # 0.022 M/sec - 21,621,287 cycles # 3.039 GHz [44.01%] - 11,198,669 stalled-cycles-frontend # 51.79% frontend cycles idle - 1,760,875 stalled-cycles-backend # 8.14% backend cycles idle - 26,010,939 instructions # 1.20 insns per cycle - # 0.43 stalled cycles per insn - 2,496,532 branches # 350.875 M/sec - 11,961 branch-misses # 0.48% of all branches [66.39%] + 21,858,485 cycles # 3.054 GHz [45.75%] + 6,691,620 stalled-cycles-frontend # 30.61% frontend cycles idle + 2,072,908 stalled-cycles-backend # 9.48% backend cycles idle + 26,016,488 instructions # 1.19 insns per cycle + # 0.26 stalled cycles per insn + 2,497,845 branches # 349.014 M/sec + 17,479 branch-misses # 0.70% of all branches - 0.018257320 seconds time elapsed + 0.007849703 seconds time elapsed Iteration 3 Performance counter stats for './matrixMultiply_clang 100': - 7.079986 task-clock # 0.347 CPUs utilized + 7.165353 task-clock # 0.909 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 158 page-faults # 0.022 M/sec - 10,701,717 cycles # 1.512 GHz - 12,488,960 stalled-cycles-frontend # 116.70% frontend cycles idle - 1,879,249 stalled-cycles-backend # 17.56% backend cycles idle - 26,014,281 instructions # 2.43 insns per cycle - # 0.48 stalled cycles per insn - 2,497,901 branches # 352.812 M/sec - 12,145 branch-misses # 0.49% of all branches [50.52%] + 21,780,583 cycles # 3.040 GHz [44.38%] + 7,179,660 stalled-cycles-frontend # 32.96% frontend cycles idle + 2,012,094 stalled-cycles-backend # 9.24% backend cycles idle + 26,010,882 instructions # 1.19 insns per cycle + # 0.28 stalled cycles per insn + 2,497,319 branches # 348.527 M/sec + 15,296 branch-misses # 0.61% of all branches [98.02%] - 0.020401132 seconds time elapsed + 0.007883982 seconds time elapsed Iteration 4 Performance counter stats for './matrixMultiply_clang 100': - 7.161954 task-clock # 0.349 CPUs utilized + 7.098440 task-clock # 0.923 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.022 M/sec - 21,783,309 cycles # 3.042 GHz [44.34%] - 7,013,348 stalled-cycles-frontend # 32.20% frontend cycles idle - 1,999,347 stalled-cycles-backend # 9.18% backend cycles idle - 26,014,845 instructions # 1.19 insns per cycle - # 0.27 stalled cycles per insn - 2,498,010 branches # 348.789 M/sec - 17,107 branch-misses # 0.68% of all branches [99.46%] + 21,662,885 cycles # 3.052 GHz [43.88%] + 8,869,299 stalled-cycles-frontend # 40.94% frontend cycles idle + 1,888,070 stalled-cycles-backend # 8.72% backend cycles idle + 26,015,267 instructions # 1.20 insns per cycle + # 0.34 stalled cycles per insn + 2,498,064 branches # 351.917 M/sec + 11,733 branch-misses # 0.47% of all branches [84.57%] - 0.020511924 seconds time elapsed + 0.007692507 seconds time elapsed Iteration 5 Performance counter stats for './matrixMultiply_clang 100': - 7.091759 task-clock # 0.382 CPUs utilized + 7.074668 task-clock # 0.881 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 158 page-faults # 0.022 M/sec - 21,631,594 cycles # 3.050 GHz [43.90%] - 8,052,137 stalled-cycles-frontend # 37.22% frontend cycles idle - 1,018,814 stalled-cycles-backend # 4.71% backend cycles idle - 26,016,589 instructions # 1.20 insns per cycle - # 0.31 stalled cycles per insn - 2,498,309 branches # 352.283 M/sec - 11,726 branch-misses # 0.47% of all branches [90.77%] + 159 page-faults # 0.022 M/sec + 9,777,089 cycles # 1.382 GHz + 12,454,494 stalled-cycles-frontend # 127.38% frontend cycles idle + 406,671 stalled-cycles-backend # 4.16% backend cycles idle + 26,021,599 instructions # 2.66 insns per cycle + # 0.48 stalled cycles per insn + 2,498,735 branches # 353.195 M/sec + 12,178 branch-misses # 0.49% of all branches [54.81%] - 0.018581414 seconds time elapsed + 0.008028569 seconds time elapsed Iteration 6 Performance counter stats for './matrixMultiply_clang 100': - 7.058442 task-clock # 0.403 CPUs utilized + 7.085217 task-clock # 0.921 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 159 page-faults # 0.023 M/sec - 10,308,729 cycles # 1.460 GHz - 12,432,356 stalled-cycles-frontend # 120.60% frontend cycles idle - 1,782,986 stalled-cycles-backend # 17.30% backend cycles idle - 26,009,608 instructions # 2.52 insns per cycle - # 0.48 stalled cycles per insn - 2,497,040 branches # 353.766 M/sec - 12,110 branch-misses # 0.48% of all branches [52.22%] + 159 page-faults # 0.022 M/sec + 21,555,105 cycles # 3.042 GHz [43.77%] + 7,800,102 stalled-cycles-frontend # 36.19% frontend cycles idle + 1,788,484 stalled-cycles-backend # 8.30% backend cycles idle + 26,025,558 instructions # 1.21 insns per cycle + # 0.30 stalled cycles per insn + 2,499,437 branches # 352.768 M/sec + 11,719 branch-misses # 0.47% of all branches [92.55%] - 0.017501458 seconds time elapsed + 0.007694468 seconds time elapsed Iteration 7 Performance counter stats for './matrixMultiply_clang 100': - 7.138612 task-clock # 0.384 CPUs utilized + 7.112728 task-clock # 0.634 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.022 M/sec - 10,763,929 cycles # 1.508 GHz - 12,648,274 stalled-cycles-frontend # 117.51% frontend cycles idle - 2,015,841 stalled-cycles-backend # 18.73% backend cycles idle - 26,010,962 instructions # 2.42 insns per cycle - # 0.49 stalled cycles per insn - 2,497,304 branches # 349.830 M/sec - 12,092 branch-misses # 0.48% of all branches [50.74%] + 21,725,477 cycles # 3.054 GHz [45.19%] + 6,733,958 stalled-cycles-frontend # 31.00% frontend cycles idle + 1,849,361 stalled-cycles-backend # 8.51% backend cycles idle + 26,024,358 instructions # 1.20 insns per cycle + # 0.26 stalled cycles per insn + 2,499,211 branches # 351.372 M/sec + 17,591 branch-misses # 0.70% of all branches - 0.018570746 seconds time elapsed + 0.011223178 seconds time elapsed Iteration 8 Performance counter stats for './matrixMultiply_clang 100': - 7.118796 task-clock # 0.324 CPUs utilized + 7.068877 task-clock # 0.921 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 158 page-faults # 0.022 M/sec - 21,667,171 cycles # 3.044 GHz [44.04%] - 12,063,028 stalled-cycles-frontend # 55.67% frontend cycles idle - 1,636,765 stalled-cycles-backend # 7.55% backend cycles idle - 26,017,653 instructions # 1.20 insns per cycle - # 0.46 stalled cycles per insn - 2,498,553 branches # 350.980 M/sec - 12,127 branch-misses # 0.49% of all branches [59.82%] + 159 page-faults # 0.022 M/sec + 21,592,249 cycles # 3.055 GHz [43.63%] + 7,807,904 stalled-cycles-frontend # 36.16% frontend cycles idle + 1,825,484 stalled-cycles-backend # 8.45% backend cycles idle + 26,012,472 instructions # 1.20 insns per cycle + # 0.30 stalled cycles per insn + 2,497,566 branches # 353.319 M/sec + 11,719 branch-misses # 0.47% of all branches [92.66%] - 0.021960130 seconds time elapsed + 0.007675305 seconds time elapsed Iteration 9 Performance counter stats for './matrixMultiply_clang 100': - 7.156216 task-clock # 0.405 CPUs utilized + 7.096350 task-clock # 0.916 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.022 M/sec - 21,827,960 cycles # 3.050 GHz [44.35%] - 11,521,800 stalled-cycles-frontend # 52.78% frontend cycles idle - 2,057,334 stalled-cycles-backend # 9.43% backend cycles idle - 26,022,933 instructions # 1.19 insns per cycle - # 0.44 stalled cycles per insn - 2,499,369 branches # 349.258 M/sec - 11,919 branch-misses # 0.48% of all branches [64.67%] + 21,606,442 cycles # 3.045 GHz [43.82%] + 9,785,352 stalled-cycles-frontend # 45.29% frontend cycles idle + 1,812,444 stalled-cycles-backend # 8.39% backend cycles idle + 26,027,894 instructions # 1.20 insns per cycle + # 0.38 stalled cycles per insn + 2,499,871 branches # 352.276 M/sec + 11,876 branch-misses # 0.48% of all branches [77.30%] - 0.017682175 seconds time elapsed + 0.007747817 seconds time elapsed Iteration 10 Performance counter stats for './matrixMultiply_clang 100': - 7.179453 task-clock # 0.347 CPUs utilized + 7.154863 task-clock # 0.920 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 158 page-faults # 0.022 M/sec - 21,825,556 cycles # 3.040 GHz [44.51%] - 11,949,081 stalled-cycles-frontend # 54.75% frontend cycles idle - 2,055,046 stalled-cycles-backend # 9.42% backend cycles idle - 26,016,430 instructions # 1.19 insns per cycle - # 0.46 stalled cycles per insn - 2,498,034 branches # 347.942 M/sec - 11,995 branch-misses # 0.48% of all branches [61.55%] - - 0.020701153 seconds time elapsed + 21,831,902 cycles # 3.051 GHz [44.34%] + 10,234,895 stalled-cycles-frontend # 46.88% frontend cycles idle + 2,001,267 stalled-cycles-backend # 9.17% backend cycles idle + 26,012,404 instructions # 1.19 insns per cycle + # 0.39 stalled cycles per insn + 2,497,750 branches # 349.098 M/sec + 11,843 branch-misses # 0.47% of all branches [74.43%] + + 0.007774080 seconds time elapsed diff --git a/sankalp/clang_output_1000 b/sankalp/clang_output_1000 index e8c639e..9a89831 100644 --- a/sankalp/clang_output_1000 +++ b/sankalp/clang_output_1000 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for './matrixMultiply_clang 1000': - 8769.076082 task-clock # 0.997 CPUs utilized + 8772.711871 task-clock # 0.997 CPUs utilized 11 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 3,074 page-faults # 0.000 M/sec - 26,838,781,079 cycles # 3.061 GHz [83.34%] - 18,717,658,625 stalled-cycles-frontend # 69.74% frontend cycles idle [83.35%] - 6,713,364,797 stalled-cycles-backend # 25.01% backend cycles idle [66.69%] - 24,147,234,143 instructions # 0.90 insns per cycle + 3,075 page-faults # 0.000 M/sec + 26,862,536,545 cycles # 3.062 GHz [83.32%] + 18,725,249,733 stalled-cycles-frontend # 69.71% frontend cycles idle [83.31%] + 6,721,535,337 stalled-cycles-backend # 25.02% backend cycles idle [66.69%] + 24,147,572,347 instructions # 0.90 insns per cycle # 0.78 stalled cycles per insn [83.35%] - 2,039,098,917 branches # 232.533 M/sec [83.35%] - 1,384,269 branch-misses # 0.07% of all branches [83.31%] + 2,039,094,766 branches # 232.436 M/sec [83.35%] + 1,418,553 branch-misses # 0.07% of all branches [83.35%] - 8.792196408 seconds time elapsed + 8.795813556 seconds time elapsed Iteration 2 Performance counter stats for './matrixMultiply_clang 1000': - 8707.169262 task-clock # 0.997 CPUs utilized - 11 context-switches # 0.000 M/sec - 0 CPU-migrations # 0.000 M/sec + 8729.146601 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec 3,074 page-faults # 0.000 M/sec - 26,661,371,630 cycles # 3.062 GHz [83.32%] - 18,528,714,106 stalled-cycles-frontend # 69.50% frontend cycles idle [83.32%] - 6,570,561,597 stalled-cycles-backend # 24.64% backend cycles idle [66.64%] - 24,147,474,932 instructions # 0.91 insns per cycle - # 0.77 stalled cycles per insn [83.32%] - 2,038,834,096 branches # 234.156 M/sec [83.36%] - 1,492,744 branch-misses # 0.07% of all branches [83.36%] + 26,728,257,650 cycles # 3.062 GHz [83.32%] + 18,598,831,843 stalled-cycles-frontend # 69.58% frontend cycles idle [83.32%] + 6,692,039,477 stalled-cycles-backend # 25.04% backend cycles idle [66.68%] + 24,143,696,220 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.36%] + 2,039,089,497 branches # 233.596 M/sec [83.36%] + 1,422,028 branch-misses # 0.07% of all branches [83.33%] - 8.729853429 seconds time elapsed + 8.751936365 seconds time elapsed Iteration 3 Performance counter stats for './matrixMultiply_clang 1000': - 8728.280704 task-clock # 0.996 CPUs utilized - 11 context-switches # 0.000 M/sec + 8743.005082 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,075 page-faults # 0.000 M/sec - 26,725,445,478 cycles # 3.062 GHz [83.31%] - 18,593,264,002 stalled-cycles-frontend # 69.57% frontend cycles idle [83.31%] - 6,414,166,461 stalled-cycles-backend # 24.00% backend cycles idle [66.67%] - 24,142,810,780 instructions # 0.90 insns per cycle - # 0.77 stalled cycles per insn [83.36%] - 2,039,114,540 branches # 233.622 M/sec [83.36%] - 1,470,269 branch-misses # 0.07% of all branches [83.35%] + 26,760,005,251 cycles # 3.061 GHz [83.34%] + 18,636,540,129 stalled-cycles-frontend # 69.64% frontend cycles idle [83.34%] + 6,696,730,595 stalled-cycles-backend # 25.03% backend cycles idle [66.69%] + 24,146,489,634 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.34%] + 2,039,117,188 branches # 233.228 M/sec [83.34%] + 1,512,869 branch-misses # 0.07% of all branches [83.32%] - 8.761864479 seconds time elapsed + 8.765745532 seconds time elapsed Iteration 4 Performance counter stats for './matrixMultiply_clang 1000': - 8739.132157 task-clock # 0.997 CPUs utilized - 11 context-switches # 0.000 M/sec + 8842.501415 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,074 page-faults # 0.000 M/sec - 26,758,988,652 cycles # 3.062 GHz [83.34%] - 18,625,435,877 stalled-cycles-frontend # 69.60% frontend cycles idle [83.34%] - 6,666,046,784 stalled-cycles-backend # 24.91% backend cycles idle [66.67%] - 24,146,277,825 instructions # 0.90 insns per cycle - # 0.77 stalled cycles per insn [83.34%] - 2,039,113,610 branches # 233.331 M/sec [83.33%] - 1,468,997 branch-misses # 0.07% of all branches [83.34%] + 27,075,333,912 cycles # 3.062 GHz [83.30%] + 18,932,760,043 stalled-cycles-frontend # 69.93% frontend cycles idle [83.34%] + 6,827,630,830 stalled-cycles-backend # 25.22% backend cycles idle [66.70%] + 24,145,756,817 instructions # 0.89 insns per cycle + # 0.78 stalled cycles per insn [83.35%] + 2,039,239,015 branches # 230.618 M/sec [83.35%] + 1,496,833 branch-misses # 0.07% of all branches [83.33%] - 8.762159807 seconds time elapsed + 8.865507756 seconds time elapsed Iteration 5 Performance counter stats for './matrixMultiply_clang 1000': - 8809.610617 task-clock # 0.997 CPUs utilized - 11 context-switches # 0.000 M/sec - 2 CPU-migrations # 0.000 M/sec - 3,075 page-faults # 0.000 M/sec - 26,973,625,450 cycles # 3.062 GHz [83.33%] - 18,839,441,332 stalled-cycles-frontend # 69.84% frontend cycles idle [83.33%] - 6,844,458,222 stalled-cycles-backend # 25.37% backend cycles idle [66.67%] - 24,146,135,534 instructions # 0.90 insns per cycle - # 0.78 stalled cycles per insn [83.33%] - 2,038,924,872 branches # 231.443 M/sec [83.33%] - 1,474,654 branch-misses # 0.07% of all branches [83.36%] + 8796.936234 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,935,212,998 cycles # 3.062 GHz [83.31%] + 18,792,486,212 stalled-cycles-frontend # 69.77% frontend cycles idle [83.31%] + 6,800,634,030 stalled-cycles-backend # 25.25% backend cycles idle [66.71%] + 24,146,395,655 instructions # 0.90 insns per cycle + # 0.78 stalled cycles per insn [83.35%] + 2,039,272,337 branches # 231.816 M/sec [83.35%] + 1,994,410 branch-misses # 0.10% of all branches [83.32%] - 8.832521671 seconds time elapsed + 8.820084000 seconds time elapsed Iteration 6 Performance counter stats for './matrixMultiply_clang 1000': - 8731.664661 task-clock # 0.997 CPUs utilized - 11 context-switches # 0.000 M/sec + 8777.014189 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,075 page-faults # 0.000 M/sec - 26,734,991,494 cycles # 3.062 GHz [83.32%] - 18,600,205,731 stalled-cycles-frontend # 69.57% frontend cycles idle [83.32%] - 6,658,829,501 stalled-cycles-backend # 24.91% backend cycles idle [66.64%] - 24,146,710,783 instructions # 0.90 insns per cycle - # 0.77 stalled cycles per insn [83.32%] - 2,038,590,197 branches # 233.471 M/sec [83.36%] - 1,479,149 branch-misses # 0.07% of all branches [83.37%] + 26,875,307,479 cycles # 3.062 GHz [83.32%] + 18,740,496,292 stalled-cycles-frontend # 69.73% frontend cycles idle [83.32%] + 6,705,290,080 stalled-cycles-backend # 24.95% backend cycles idle [66.68%] + 24,144,531,987 instructions # 0.90 insns per cycle + # 0.78 stalled cycles per insn [83.36%] + 2,039,002,045 branches # 232.312 M/sec [83.36%] + 1,448,566 branch-misses # 0.07% of all branches [83.34%] - 8.754675160 seconds time elapsed + 8.799930149 seconds time elapsed Iteration 7 Performance counter stats for './matrixMultiply_clang 1000': - 8781.456874 task-clock # 0.997 CPUs utilized - 11 context-switches # 0.000 M/sec - 0 CPU-migrations # 0.000 M/sec - 3,074 page-faults # 0.000 M/sec - 26,887,726,132 cycles # 3.062 GHz [83.32%] - 18,751,149,081 stalled-cycles-frontend # 69.74% frontend cycles idle [83.32%] - 6,684,330,025 stalled-cycles-backend # 24.86% backend cycles idle [66.65%] - 24,149,943,484 instructions # 0.90 insns per cycle - # 0.78 stalled cycles per insn [83.32%] - 2,038,235,876 branches # 232.107 M/sec [83.36%] - 1,534,832 branch-misses # 0.08% of all branches [83.36%] + 8796.815931 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,935,093,613 cycles # 3.062 GHz [83.32%] + 18,796,887,746 stalled-cycles-frontend # 69.79% frontend cycles idle [83.31%] + 6,818,360,264 stalled-cycles-backend # 25.31% backend cycles idle [66.69%] + 24,148,095,266 instructions # 0.90 insns per cycle + # 0.78 stalled cycles per insn [83.35%] + 2,039,005,393 branches # 231.789 M/sec [83.35%] + 1,638,652 branch-misses # 0.08% of all branches [83.35%] - 8.804251361 seconds time elapsed + 8.819932807 seconds time elapsed Iteration 8 Performance counter stats for './matrixMultiply_clang 1000': - 8795.785758 task-clock # 0.992 CPUs utilized + 8732.240674 task-clock # 0.997 CPUs utilized 10 context-switches # 0.000 M/sec - 1 CPU-migrations # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec 3,075 page-faults # 0.000 M/sec - 26,931,981,875 cycles # 3.062 GHz [83.31%] - 18,798,044,585 stalled-cycles-frontend # 69.80% frontend cycles idle [83.31%] - 6,712,144,445 stalled-cycles-backend # 24.92% backend cycles idle [66.70%] - 24,146,517,257 instructions # 0.90 insns per cycle - # 0.78 stalled cycles per insn [83.35%] - 2,039,265,646 branches # 231.846 M/sec [83.35%] - 1,412,000 branch-misses # 0.07% of all branches [83.33%] + 26,737,479,415 cycles # 3.062 GHz [83.32%] + 18,601,807,786 stalled-cycles-frontend # 69.57% frontend cycles idle [83.32%] + 6,561,397,504 stalled-cycles-backend # 24.54% backend cycles idle [66.64%] + 24,149,772,261 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.32%] + 2,038,395,170 branches # 233.433 M/sec [83.36%] + 1,485,301 branch-misses # 0.07% of all branches [83.36%] - 8.867892519 seconds time elapsed + 8.757509097 seconds time elapsed Iteration 9 Performance counter stats for './matrixMultiply_clang 1000': - 8791.557708 task-clock # 0.997 CPUs utilized - 10 context-switches # 0.000 M/sec + 8706.559421 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,075 page-faults # 0.000 M/sec - 26,912,031,887 cycles # 3.061 GHz [83.30%] - 18,778,586,140 stalled-cycles-frontend # 69.78% frontend cycles idle [83.34%] - 6,841,179,980 stalled-cycles-backend # 25.42% backend cycles idle [66.69%] - 24,147,072,411 instructions # 0.90 insns per cycle - # 0.78 stalled cycles per insn [83.34%] - 2,039,492,116 branches # 231.983 M/sec [83.34%] - 1,463,732 branch-misses # 0.07% of all branches [83.34%] + 26,659,676,782 cycles # 3.062 GHz [83.32%] + 18,523,302,399 stalled-cycles-frontend # 69.48% frontend cycles idle [83.32%] + 6,570,207,292 stalled-cycles-backend # 24.64% backend cycles idle [66.67%] + 24,136,767,039 instructions # 0.91 insns per cycle + # 0.77 stalled cycles per insn [83.36%] + 2,038,880,459 branches # 234.178 M/sec [83.36%] + 1,511,548 branch-misses # 0.07% of all branches [83.36%] - 8.814620354 seconds time elapsed + 8.729259859 seconds time elapsed Iteration 10 Performance counter stats for './matrixMultiply_clang 1000': - 8702.176014 task-clock # 0.997 CPUs utilized - 10 context-switches # 0.000 M/sec + 8744.544589 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,074 page-faults # 0.000 M/sec - 26,645,316,492 cycles # 3.062 GHz [83.31%] - 18,513,602,676 stalled-cycles-frontend # 69.48% frontend cycles idle [83.31%] - 6,473,846,309 stalled-cycles-backend # 24.30% backend cycles idle [66.71%] - 24,146,713,933 instructions # 0.91 insns per cycle - # 0.77 stalled cycles per insn [83.36%] - 2,039,226,629 branches # 234.335 M/sec [83.35%] - 1,443,603 branch-misses # 0.07% of all branches [83.32%] - - 8.725090631 seconds time elapsed + 26,775,031,633 cycles # 3.062 GHz [83.30%] + 18,640,605,103 stalled-cycles-frontend # 69.62% frontend cycles idle [83.34%] + 6,628,187,063 stalled-cycles-backend # 24.76% backend cycles idle [66.69%] + 24,147,316,203 instructions # 0.90 insns per cycle + # 0.77 stalled cycles per insn [83.35%] + 2,038,977,599 branches # 233.171 M/sec [83.35%] + 1,410,532 branch-misses # 0.07% of all branches [83.34%] + + 8.767426048 seconds time elapsed diff --git a/sankalp/clang_output_10000 b/sankalp/clang_output_10000 deleted file mode 100644 index 78f379f..0000000 --- a/sankalp/clang_output_10000 +++ /dev/null @@ -1 +0,0 @@ -Iteration 1 diff --git a/sankalp/clang_output_3000 b/sankalp/clang_output_3000 new file mode 100644 index 0000000..67b7ded --- /dev/null +++ b/sankalp/clang_output_3000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply_clang 3000': + + 435870.895044 task-clock # 0.997 CPUs utilized + 547 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,334,464,156,843 cycles # 3.062 GHz [83.33%] + 1,116,588,716,003 stalled-cycles-frontend # 83.67% frontend cycles idle [83.33%] + 760,549,402,793 stalled-cycles-backend # 56.99% backend cycles idle [66.67%] + 649,625,214,346 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,405,175,566 branches # 124.819 M/sec [83.33%] + 31,641,759 branch-misses # 0.06% of all branches [83.33%] + + 436.970014074 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436941.138626 task-clock # 0.997 CPUs utilized + 547 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,337,726,968,521 cycles # 3.062 GHz [83.33%] + 1,119,881,200,210 stalled-cycles-frontend # 83.72% frontend cycles idle [83.33%] + 763,263,721,765 stalled-cycles-backend # 57.06% backend cycles idle [66.67%] + 649,639,608,656 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,408,743,887 branches # 124.522 M/sec [83.33%] + 30,480,854 branch-misses # 0.06% of all branches [83.33%] + + 438.038257320 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436215.746174 task-clock # 0.997 CPUs utilized + 545 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,335,345,423,489 cycles # 3.061 GHz [83.33%] + 1,117,485,526,106 stalled-cycles-frontend # 83.69% frontend cycles idle [83.33%] + 759,502,326,023 stalled-cycles-backend # 56.88% backend cycles idle [66.67%] + 649,657,788,647 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,408,979,455 branches # 124.730 M/sec [83.33%] + 29,850,475 branch-misses # 0.05% of all branches [83.33%] + + 437.311297435 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply_clang 3000': + + 434660.839337 task-clock # 0.997 CPUs utilized + 544 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,330,746,441,063 cycles # 3.062 GHz [83.33%] + 1,112,865,667,103 stalled-cycles-frontend # 83.63% frontend cycles idle [83.33%] + 755,733,809,906 stalled-cycles-backend # 56.79% backend cycles idle [66.67%] + 649,629,467,424 instructions # 0.49 insns per cycle + # 1.71 stalled cycles per insn [83.33%] + 54,405,944,056 branches # 125.169 M/sec [83.33%] + 30,324,200 branch-misses # 0.06% of all branches [83.33%] + + 435.752964281 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply_clang 3000': + + 438781.815155 task-clock # 0.997 CPUs utilized + 550 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,343,359,727,224 cycles # 3.062 GHz [83.33%] + 1,125,419,926,576 stalled-cycles-frontend # 83.78% frontend cycles idle [83.33%] + 768,084,785,121 stalled-cycles-backend # 57.18% backend cycles idle [66.67%] + 649,665,121,934 instructions # 0.48 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,408,662,474 branches # 123.999 M/sec [83.33%] + 30,594,105 branch-misses # 0.06% of all branches [83.33%] + + 439.883691061 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436113.735318 task-clock # 0.997 CPUs utilized + 546 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,335,204,076,506 cycles # 3.062 GHz [83.33%] + 1,117,295,887,936 stalled-cycles-frontend # 83.68% frontend cycles idle [83.33%] + 749,322,042,821 stalled-cycles-backend # 56.12% backend cycles idle [66.67%] + 649,625,709,220 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,406,397,138 branches # 124.753 M/sec [83.33%] + 30,194,121 branch-misses # 0.06% of all branches [83.33%] + + 437.209951172 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply_clang 3000': + + 436287.286674 task-clock # 0.997 CPUs utilized + 546 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,335,745,938,506 cycles # 3.062 GHz [83.33%] + 1,117,869,103,390 stalled-cycles-frontend # 83.69% frontend cycles idle [83.33%] + 758,032,143,399 stalled-cycles-backend # 56.75% backend cycles idle [66.67%] + 649,644,491,128 instructions # 0.49 insns per cycle + # 1.72 stalled cycles per insn [83.33%] + 54,406,028,719 branches # 124.702 M/sec [83.33%] + 30,072,867 branch-misses # 0.06% of all branches [83.33%] + + 437.394865809 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply_clang 3000': + + 437543.330568 task-clock # 0.997 CPUs utilized + 546 context-switches # 0.000 M/sec + 2 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,339,450,532,302 cycles # 3.061 GHz [83.33%] + 1,121,680,166,977 stalled-cycles-frontend # 83.74% frontend cycles idle [83.33%] + 764,834,662,020 stalled-cycles-backend # 57.10% backend cycles idle [66.67%] + 649,641,184,728 instructions # 0.49 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,409,529,224 branches # 124.352 M/sec [83.33%] + 30,174,500 branch-misses # 0.06% of all branches [83.33%] + + 438.656887155 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply_clang 3000': + + 437737.932628 task-clock # 0.997 CPUs utilized + 549 context-switches # 0.000 M/sec + 2 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,340,168,475,305 cycles # 3.062 GHz [83.33%] + 1,122,266,697,544 stalled-cycles-frontend # 83.74% frontend cycles idle [83.33%] + 762,038,651,178 stalled-cycles-backend # 56.86% backend cycles idle [66.67%] + 649,657,223,429 instructions # 0.48 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,407,338,177 branches # 124.292 M/sec [83.33%] + 29,532,180 branch-misses # 0.05% of all branches [83.33%] + + 438.836735033 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang 3000': + + 438463.682513 task-clock # 0.997 CPUs utilized + 548 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,342,374,344,170 cycles # 3.062 GHz [83.33%] + 1,124,481,215,389 stalled-cycles-frontend # 83.77% frontend cycles idle [83.33%] + 765,398,751,304 stalled-cycles-backend # 57.02% backend cycles idle [66.67%] + 649,726,904,426 instructions # 0.48 insns per cycle + # 1.73 stalled cycles per insn [83.33%] + 54,402,887,069 branches # 124.076 M/sec [83.33%] + 30,000,209 branch-misses # 0.06% of all branches [83.33%] + + 439.566853455 seconds time elapsed + diff --git a/sankalp/clang_output_optimized_10 b/sankalp/clang_output_optimized_10 new file mode 100644 index 0000000..62f3d83 --- /dev/null +++ b/sankalp/clang_output_optimized_10 @@ -0,0 +1,180 @@ +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.254738 task-clock # 0.186 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.499 M/sec + 773,561 cycles # 3.037 GHz + 467,851 stalled-cycles-frontend # 60.48% frontend cycles idle + 310,294 stalled-cycles-backend # 40.11% backend cycles idle + 535,991 instructions # 0.69 insns per cycle + # 0.87 stalled cycles per insn + 104,877 branches # 411.705 M/sec + branch-misses + + 0.001369651 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.253462 task-clock # 0.312 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.501 M/sec + 770,150 cycles # 3.039 GHz + 467,124 stalled-cycles-frontend # 60.65% frontend cycles idle + 321,201 stalled-cycles-backend # 41.71% backend cycles idle + 537,198 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 105,085 branches # 414.599 M/sec + branch-misses + + 0.000812752 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.252371 task-clock # 0.313 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.503 M/sec + 767,605 cycles # 3.042 GHz + 462,629 stalled-cycles-frontend # 60.27% frontend cycles idle + 328,909 stalled-cycles-backend # 42.85% backend cycles idle + 536,671 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 104,982 branches # 415.983 M/sec + branch-misses + + 0.000806311 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.255736 task-clock # 0.336 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.493 M/sec + 762,557 cycles # 2.982 GHz + 460,565 stalled-cycles-frontend # 60.40% frontend cycles idle + 323,600 stalled-cycles-backend # 42.44% backend cycles idle + 538,377 instructions # 0.71 insns per cycle + # 0.86 stalled cycles per insn + 105,324 branches # 411.847 M/sec + branch-misses + + 0.000760748 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.256619 task-clock # 0.325 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.495 M/sec + 772,169 cycles # 3.009 GHz + 465,669 stalled-cycles-frontend # 60.31% frontend cycles idle + 306,523 stalled-cycles-backend # 39.70% backend cycles idle + 541,471 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,836 branches # 412.425 M/sec + branch-misses + + 0.000790370 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.252237 task-clock # 0.135 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.503 M/sec + 766,577 cycles # 3.039 GHz + 463,314 stalled-cycles-frontend # 60.44% frontend cycles idle + 326,037 stalled-cycles-backend # 42.53% backend cycles idle + 538,054 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,236 branches # 417.211 M/sec + branch-misses + + 0.001870156 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.260936 task-clock # 0.320 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.487 M/sec + 779,990 cycles # 2.989 GHz + 473,885 stalled-cycles-frontend # 60.76% frontend cycles idle + 324,297 stalled-cycles-backend # 41.58% backend cycles idle + 539,619 instructions # 0.69 insns per cycle + # 0.88 stalled cycles per insn + 105,485 branches # 404.256 M/sec + branch-misses + + 0.000815701 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.246294 task-clock # 0.338 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.516 M/sec + cycles + 443,508 stalled-cycles-frontend # 0.00% frontend cycles idle + 317,155 stalled-cycles-backend # 0.00% backend cycles idle + 542,075 instructions # 0.00 insns per cycle + # 0.82 stalled cycles per insn + 105,925 branches # 430.075 M/sec + 6,517 branch-misses # 6.15% of all branches + + 0.000728607 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.242933 task-clock # 0.293 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.523 M/sec + 727,267 cycles # 2.994 GHz + 425,098 stalled-cycles-frontend # 58.45% frontend cycles idle + 304,182 stalled-cycles-backend # 41.83% backend cycles idle + 537,280 instructions # 0.74 insns per cycle + # 0.79 stalled cycles per insn + 105,121 branches # 432.716 M/sec + branch-misses + + 0.000830440 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_clang_optimized 10': + + 0.256732 task-clock # 0.216 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.491 M/sec + 768,563 cycles # 2.994 GHz + 464,524 stalled-cycles-frontend # 60.44% frontend cycles idle + 330,887 stalled-cycles-backend # 43.05% backend cycles idle + 536,485 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 104,979 branches # 408.905 M/sec + branch-misses + + 0.001186233 seconds time elapsed + diff --git a/sankalp/clang_output_optimized_100 b/sankalp/clang_output_optimized_100 new file mode 100644 index 0000000..0924dc0 --- /dev/null +++ b/sankalp/clang_output_optimized_100 @@ -0,0 +1,180 @@ +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.962034 task-clock # 0.665 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.081 M/sec + 6,010,763 cycles # 3.064 GHz + 884,158 stalled-cycles-frontend # 14.71% frontend cycles idle + 737,567 stalled-cycles-backend # 12.27% backend cycles idle + 11,047,971 instructions # 1.84 insns per cycle + # 0.08 stalled cycles per insn + 1,468,085 branches # 748.246 M/sec + branch-misses + + 0.002949295 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.948140 task-clock # 0.439 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,966,906 cycles # 3.063 GHz + 855,303 stalled-cycles-frontend # 14.33% frontend cycles idle + 694,676 stalled-cycles-backend # 11.64% backend cycles idle + 11,045,804 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,692 branches # 753.381 M/sec + branch-misses + + 0.004435255 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.947260 task-clock # 0.765 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,964,814 cycles # 3.063 GHz + 855,649 stalled-cycles-frontend # 14.34% frontend cycles idle + 710,245 stalled-cycles-backend # 11.91% backend cycles idle + 11,043,997 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,392 branches # 753.568 M/sec + branch-misses + + 0.002546083 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.947560 task-clock # 0.765 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,952,625 cycles # 3.056 GHz + 850,406 stalled-cycles-frontend # 14.29% frontend cycles idle + 375,296 stalled-cycles-backend # 6.30% backend cycles idle + 11,022,704 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,463,754 branches # 751.584 M/sec + branch-misses + + 0.002546145 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.943698 task-clock # 0.674 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,952,763 cycles # 3.063 GHz + 846,909 stalled-cycles-frontend # 14.23% frontend cycles idle + 708,785 stalled-cycles-backend # 11.91% backend cycles idle + 11,039,332 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,466,568 branches # 754.525 M/sec + branch-misses + + 0.002885840 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.949368 task-clock # 0.765 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,970,129 cycles # 3.063 GHz + 859,909 stalled-cycles-frontend # 14.40% frontend cycles idle + 722,803 stalled-cycles-backend # 12.11% backend cycles idle + 11,045,169 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,572 branches # 752.845 M/sec + branch-misses + + 0.002548488 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 2.021036 task-clock # 0.771 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.078 M/sec + 3,528,170 cycles # 1.746 GHz + 1,009,925 stalled-cycles-frontend # 28.62% frontend cycles idle + 726,356 stalled-cycles-backend # 20.59% backend cycles idle + 11,046,780 instructions # 3.13 insns per cycle + # 0.09 stalled cycles per insn + 1,467,943 branches # 726.332 M/sec + 14,164 branch-misses # 0.96% of all branches [42.61%] + + 0.002620696 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.959283 task-clock # 0.779 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,463,297 cycles # 2.788 GHz + 862,862 stalled-cycles-frontend # 15.79% frontend cycles idle + 699,735 stalled-cycles-backend # 12.81% backend cycles idle + 11,048,711 instructions # 2.02 insns per cycle + # 0.08 stalled cycles per insn + 1,468,247 branches # 749.380 M/sec + 17,826 branch-misses # 1.21% of all branches [ 8.79%] + + 0.002515733 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.948017 task-clock # 0.780 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.081 M/sec + 5,966,888 cycles # 3.063 GHz + 856,064 stalled-cycles-frontend # 14.35% frontend cycles idle + 711,299 stalled-cycles-backend # 11.92% backend cycles idle + 11,046,813 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,467,618 branches # 753.391 M/sec + branch-misses + + 0.002498457 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_clang_optimized 100': + + 1.949114 task-clock # 0.785 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.082 M/sec + 5,952,365 cycles # 3.054 GHz + 848,125 stalled-cycles-frontend # 14.25% frontend cycles idle + 694,927 stalled-cycles-backend # 11.67% backend cycles idle + 11,033,183 instructions # 1.85 insns per cycle + # 0.08 stalled cycles per insn + 1,465,119 branches # 751.685 M/sec + branch-misses + + 0.002481376 seconds time elapsed + diff --git a/sankalp/clang_output_optimized_1000 b/sankalp/clang_output_optimized_1000 new file mode 100644 index 0000000..8ca645e --- /dev/null +++ b/sankalp/clang_output_optimized_1000 @@ -0,0 +1,180 @@ +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8514.301983 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,069,051,123 cycles # 3.062 GHz [83.32%] + 20,139,826,256 stalled-cycles-frontend # 77.26% frontend cycles idle [83.32%] + 10,074,406,315 stalled-cycles-backend # 38.65% backend cycles idle [66.68%] + 9,145,854,517 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.36%] + 1,035,460,914 branches # 121.614 M/sec [83.36%] + 1,085,246 branch-misses # 0.10% of all branches [83.34%] + + 8.536668414 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8533.423369 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,129,292,006 cycles # 3.062 GHz [83.31%] + 20,207,838,356 stalled-cycles-frontend # 77.34% frontend cycles idle [83.31%] + 9,007,753,681 stalled-cycles-backend # 34.47% backend cycles idle [66.71%] + 9,149,245,795 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.35%] + 1,035,427,362 branches # 121.338 M/sec [83.35%] + 1,083,353 branch-misses # 0.10% of all branches [83.33%] + + 8.555680250 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8524.533087 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,102,555,736 cycles # 3.062 GHz [83.34%] + 20,174,883,322 stalled-cycles-frontend # 77.29% frontend cycles idle [83.34%] + 9,795,587,822 stalled-cycles-backend # 37.53% backend cycles idle [66.67%] + 9,149,253,149 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.34%] + 1,035,420,531 branches # 121.464 M/sec [83.34%] + 1,081,429 branch-misses # 0.10% of all branches [83.35%] + + 8.546955580 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8558.061972 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,204,898,727 cycles # 3.062 GHz [83.31%] + 20,288,761,306 stalled-cycles-frontend # 77.42% frontend cycles idle [83.31%] + 9,856,648,639 stalled-cycles-backend # 37.61% backend cycles idle [66.71%] + 9,149,320,923 instructions # 0.35 insns per cycle + # 2.22 stalled cycles per insn [83.36%] + 1,035,453,678 branches # 120.992 M/sec [83.36%] + 1,081,282 branch-misses # 0.10% of all branches [83.32%] + + 8.580455767 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8513.464377 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,067,845,209 cycles # 3.062 GHz [83.32%] + 20,133,798,950 stalled-cycles-frontend # 77.24% frontend cycles idle [83.32%] + 8,624,644,078 stalled-cycles-backend # 33.09% backend cycles idle [66.67%] + 9,145,773,113 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.36%] + 1,035,458,509 branches # 121.626 M/sec [83.36%] + 1,089,716 branch-misses # 0.11% of all branches [83.35%] + + 8.535682415 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8519.104099 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,085,519,997 cycles # 3.062 GHz [83.33%] + 20,156,984,198 stalled-cycles-frontend # 77.27% frontend cycles idle [83.33%] + 8,787,917,469 stalled-cycles-backend # 33.69% backend cycles idle [66.65%] + 9,149,566,196 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.33%] + 1,034,627,980 branches # 121.448 M/sec [83.37%] + 1,113,417 branch-misses # 0.11% of all branches [83.36%] + + 8.541426918 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8525.969981 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,106,906,116 cycles # 3.062 GHz [83.30%] + 20,176,019,515 stalled-cycles-frontend # 77.28% frontend cycles idle [83.34%] + 8,065,406,321 stalled-cycles-backend # 30.89% backend cycles idle [66.68%] + 9,149,290,275 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.34%] + 1,035,439,656 branches # 121.445 M/sec [83.34%] + 1,090,091 branch-misses # 0.11% of all branches [83.34%] + + 8.548269885 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8530.756566 task-clock # 0.997 CPUs utilized + 10 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,120,379,015 cycles # 3.062 GHz [83.30%] + 20,196,068,508 stalled-cycles-frontend # 77.32% frontend cycles idle [83.30%] + 9,181,947,822 stalled-cycles-backend # 35.15% backend cycles idle [66.70%] + 9,150,553,323 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.35%] + 1,035,656,501 branches # 121.403 M/sec [83.35%] + 1,099,565 branch-misses # 0.11% of all branches [83.35%] + + 8.553018234 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8537.180438 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,141,449,324 cycles # 3.062 GHz [83.31%] + 20,217,377,597 stalled-cycles-frontend # 77.34% frontend cycles idle [83.32%] + 7,883,092,404 stalled-cycles-backend # 30.16% backend cycles idle [66.67%] + 9,145,842,530 instructions # 0.35 insns per cycle + # 2.21 stalled cycles per insn [83.36%] + 1,035,464,658 branches # 121.289 M/sec [83.36%] + 1,094,606 branch-misses # 0.11% of all branches [83.35%] + + 8.561051194 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_clang_optimized 1000': + + 8521.967213 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,092,302,546 cycles # 3.062 GHz [83.33%] + 20,165,608,254 stalled-cycles-frontend # 77.29% frontend cycles idle [83.33%] + 9,103,949,656 stalled-cycles-backend # 34.89% backend cycles idle [66.66%] + 9,149,688,050 instructions # 0.35 insns per cycle + # 2.20 stalled cycles per insn [83.33%] + 1,035,529,349 branches # 121.513 M/sec [83.33%] + 1,095,002 branch-misses # 0.11% of all branches [83.36%] + + 8.544265295 seconds time elapsed + diff --git a/sankalp/clang_output_optimized_3000 b/sankalp/clang_output_optimized_3000 new file mode 100644 index 0000000..58371ab --- /dev/null +++ b/sankalp/clang_output_optimized_3000 @@ -0,0 +1,180 @@ +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 312664.887494 task-clock # 0.997 CPUs utilized + 392 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 957,194,643,582 cycles # 3.061 GHz [83.33%] + 801,278,508,780 stalled-cycles-frontend # 83.71% frontend cycles idle [83.33%] + 654,614,518,739 stalled-cycles-backend # 68.39% backend cycles idle [66.67%] + 244,566,461,305 instructions # 0.26 insns per cycle + # 3.28 stalled cycles per insn [83.33%] + 27,360,317,759 branches # 87.507 M/sec [83.33%] + 13,880,850 branch-misses # 0.05% of all branches [83.33%] + + 313.450793586 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 312283.288554 task-clock # 0.997 CPUs utilized + 391 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 955,893,620,144 cycles # 3.061 GHz [83.33%] + 799,959,307,996 stalled-cycles-frontend # 83.69% frontend cycles idle [83.33%] + 624,558,986,255 stalled-cycles-backend # 65.34% backend cycles idle [66.67%] + 244,567,518,923 instructions # 0.26 insns per cycle + # 3.27 stalled cycles per insn [83.33%] + 27,361,026,722 branches # 87.616 M/sec [83.33%] + 12,927,656 branch-misses # 0.05% of all branches [83.33%] + + 313.069529799 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 314069.326289 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 961,431,758,518 cycles # 3.061 GHz [83.33%] + 805,494,232,345 stalled-cycles-frontend # 83.78% frontend cycles idle [83.33%] + 630,365,211,190 stalled-cycles-backend # 65.57% backend cycles idle [66.67%] + 244,567,418,849 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,359,270,785 branches # 87.112 M/sec [83.33%] + 12,925,094 branch-misses # 0.05% of all branches [83.33%] + + 314.901173672 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313605.668691 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 960,099,341,079 cycles # 3.061 GHz [83.33%] + 804,209,745,312 stalled-cycles-frontend # 83.76% frontend cycles idle [83.33%] + 650,945,842,810 stalled-cycles-backend # 67.80% backend cycles idle [66.67%] + 244,552,833,312 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,360,236,458 branches # 87.244 M/sec [83.33%] + 13,471,738 branch-misses # 0.05% of all branches [83.33%] + + 314.393884834 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313966.377674 task-clock # 0.997 CPUs utilized + 394 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 961,182,730,314 cycles # 3.061 GHz [83.33%] + 805,474,890,245 stalled-cycles-frontend # 83.80% frontend cycles idle [83.33%] + 655,175,905,580 stalled-cycles-backend # 68.16% backend cycles idle [66.67%] + 244,546,839,813 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,360,307,420 branches # 87.144 M/sec [83.33%] + 13,625,821 branch-misses # 0.05% of all branches [83.33%] + + 314.755038625 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 315025.036094 task-clock # 0.997 CPUs utilized + 395 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 964,449,384,871 cycles # 3.062 GHz [83.33%] + 808,592,472,928 stalled-cycles-frontend # 83.84% frontend cycles idle [83.33%] + 646,222,264,512 stalled-cycles-backend # 67.00% backend cycles idle [66.67%] + 244,552,197,147 instructions # 0.25 insns per cycle + # 3.31 stalled cycles per insn [83.33%] + 27,358,770,723 branches # 86.846 M/sec [83.33%] + 13,799,312 branch-misses # 0.05% of all branches [83.33%] + + 315.816146040 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313884.082830 task-clock # 0.997 CPUs utilized + 394 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 960,938,455,254 cycles # 3.061 GHz [83.33%] + 804,967,633,007 stalled-cycles-frontend # 83.77% frontend cycles idle [83.33%] + 658,597,647,955 stalled-cycles-backend # 68.54% backend cycles idle [66.67%] + 244,565,187,191 instructions # 0.25 insns per cycle + # 3.29 stalled cycles per insn [83.33%] + 27,361,335,010 branches # 87.170 M/sec [83.33%] + 13,868,957 branch-misses # 0.05% of all branches [83.33%] + + 314.672105390 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 313070.788902 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 958,299,705,663 cycles # 3.061 GHz [83.33%] + 802,791,733,825 stalled-cycles-frontend # 83.77% frontend cycles idle [83.33%] + 620,995,884,839 stalled-cycles-backend # 64.80% backend cycles idle [66.67%] + 244,575,491,330 instructions # 0.26 insns per cycle + # 3.28 stalled cycles per insn [83.33%] + 27,361,827,022 branches # 87.398 M/sec [83.33%] + 13,026,877 branch-misses # 0.05% of all branches [83.33%] + + 313.856410059 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 315731.190936 task-clock # 0.997 CPUs utilized + 396 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 966,582,281,999 cycles # 3.061 GHz [83.33%] + 810,643,864,844 stalled-cycles-frontend # 83.87% frontend cycles idle [83.33%] + 653,645,811,041 stalled-cycles-backend # 67.62% backend cycles idle [66.67%] + 244,568,729,928 instructions # 0.25 insns per cycle + # 3.31 stalled cycles per insn [83.33%] + 27,361,596,499 branches # 86.661 M/sec [83.33%] + 13,710,796 branch-misses # 0.05% of all branches [83.33%] + + 316.523716704 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_clang_optimized 3000': + + 312952.702244 task-clock # 0.997 CPUs utilized + 393 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 957,972,776,248 cycles # 3.061 GHz [83.33%] + 802,443,920,049 stalled-cycles-frontend # 83.76% frontend cycles idle [83.33%] + 640,914,452,512 stalled-cycles-backend # 66.90% backend cycles idle [66.67%] + 244,578,176,924 instructions # 0.26 insns per cycle + # 3.28 stalled cycles per insn [83.33%] + 27,363,549,903 branches # 87.437 M/sec [83.33%] + 13,598,396 branch-misses # 0.05% of all branches [83.33%] + + 313.738715536 seconds time elapsed + diff --git a/sankalp/gcc_output_10 b/sankalp/gcc_output_10 index de1da14..d533c09 100644 --- a/sankalp/gcc_output_10 +++ b/sankalp/gcc_output_10 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for './matrixMultiply 10': - 0.266541 task-clock # 0.194 CPUs utilized + 0.264216 task-clock # 0.234 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.476 M/sec - 807,616 cycles # 3.030 GHz - 489,993 stalled-cycles-frontend # 60.67% frontend cycles idle - 346,852 stalled-cycles-backend # 42.95% backend cycles idle - 583,105 instructions # 0.72 insns per cycle + 127 page-faults # 0.481 M/sec + 801,618 cycles # 3.034 GHz + 485,598 stalled-cycles-frontend # 60.58% frontend cycles idle + 331,763 stalled-cycles-backend # 41.39% backend cycles idle + 579,543 instructions # 0.72 insns per cycle # 0.84 stalled cycles per insn - 105,807 branches # 396.963 M/sec + 105,217 branches # 398.223 M/sec branch-misses - 0.001377109 seconds time elapsed + 0.001130275 seconds time elapsed Iteration 2 Performance counter stats for './matrixMultiply 10': - 0.260838 task-clock # 0.259 CPUs utilized + 0.260784 task-clock # 0.312 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.483 M/sec - 792,688 cycles # 3.039 GHz - 477,943 stalled-cycles-frontend # 60.29% frontend cycles idle - 321,946 stalled-cycles-backend # 40.61% backend cycles idle - 575,323 instructions # 0.73 insns per cycle - # 0.83 stalled cycles per insn - 104,469 branches # 400.513 M/sec + 127 page-faults # 0.487 M/sec + 793,277 cycles # 3.042 GHz + 476,100 stalled-cycles-frontend # 60.02% frontend cycles idle + 334,152 stalled-cycles-backend # 42.12% backend cycles idle + 582,916 instructions # 0.73 insns per cycle + # 0.82 stalled cycles per insn + 105,800 branches # 405.700 M/sec branch-misses - 0.001007969 seconds time elapsed + 0.000836036 seconds time elapsed Iteration 3 Performance counter stats for './matrixMultiply 10': - 0.261618 task-clock # 0.283 CPUs utilized + 0.260142 task-clock # 0.341 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.485 M/sec - 795,541 cycles # 3.041 GHz - 477,412 stalled-cycles-frontend # 60.01% frontend cycles idle - 337,422 stalled-cycles-backend # 42.41% backend cycles idle - 582,806 instructions # 0.73 insns per cycle - # 0.82 stalled cycles per insn - 105,752 branches # 404.223 M/sec - branch-misses + 127 page-faults # 0.488 M/sec + cycles + 469,818 stalled-cycles-frontend # 0.00% frontend cycles idle + 330,664 stalled-cycles-backend # 0.00% backend cycles idle + 581,476 instructions # 0.00 insns per cycle + # 0.81 stalled cycles per insn + 105,525 branches # 405.644 M/sec + 6,383 branch-misses # 6.05% of all branches - 0.000924018 seconds time elapsed + 0.000763764 seconds time elapsed Iteration 4 Performance counter stats for './matrixMultiply 10': - 0.261609 task-clock # 0.274 CPUs utilized + 0.265972 task-clock # 0.264 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.485 M/sec - 795,911 cycles # 3.042 GHz - 479,407 stalled-cycles-frontend # 60.23% frontend cycles idle - 329,890 stalled-cycles-backend # 41.45% backend cycles idle - 576,865 instructions # 0.72 insns per cycle - # 0.83 stalled cycles per insn - 104,737 branches # 400.357 M/sec + 127 page-faults # 0.477 M/sec + 783,856 cycles # 2.947 GHz + 469,562 stalled-cycles-frontend # 59.90% frontend cycles idle + 316,315 stalled-cycles-backend # 40.35% backend cycles idle + 579,399 instructions # 0.74 insns per cycle + # 0.81 stalled cycles per insn + 105,152 branches # 395.350 M/sec branch-misses - 0.000954734 seconds time elapsed + 0.001005965 seconds time elapsed Iteration 5 Performance counter stats for './matrixMultiply 10': - 0.264603 task-clock # 0.306 CPUs utilized + 0.262319 task-clock # 0.287 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.480 M/sec - 804,473 cycles # 3.040 GHz - 485,108 stalled-cycles-frontend # 60.30% frontend cycles idle - 323,522 stalled-cycles-backend # 40.22% backend cycles idle - 588,043 instructions # 0.73 insns per cycle + 126 page-faults # 0.480 M/sec + 798,897 cycles # 3.046 GHz + 481,149 stalled-cycles-frontend # 60.23% frontend cycles idle + 294,473 stalled-cycles-backend # 36.86% backend cycles idle + 584,326 instructions # 0.73 insns per cycle # 0.82 stalled cycles per insn - 106,407 branches # 402.138 M/sec + 105,770 branches # 403.211 M/sec branch-misses - 0.000864057 seconds time elapsed + 0.000915591 seconds time elapsed Iteration 6 Performance counter stats for './matrixMultiply 10': - 0.260205 task-clock # 0.112 CPUs utilized + 0.261447 task-clock # 0.150 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.484 M/sec - 790,756 cycles # 3.039 GHz - 477,383 stalled-cycles-frontend # 60.37% frontend cycles idle - 310,137 stalled-cycles-backend # 39.22% backend cycles idle - 579,178 instructions # 0.73 insns per cycle - # 0.82 stalled cycles per insn - 105,108 branches # 403.943 M/sec + 127 page-faults # 0.486 M/sec + 795,646 cycles # 3.043 GHz + 481,327 stalled-cycles-frontend # 60.50% frontend cycles idle + 341,767 stalled-cycles-backend # 42.95% backend cycles idle + 574,374 instructions # 0.72 insns per cycle + # 0.84 stalled cycles per insn + 103,651 branches # 396.451 M/sec branch-misses - 0.002318265 seconds time elapsed + 0.001741528 seconds time elapsed Iteration 7 Performance counter stats for './matrixMultiply 10': - 0.259797 task-clock # 0.330 CPUs utilized + 0.264455 task-clock # 0.300 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 126 page-faults # 0.485 M/sec - 789,876 cycles # 3.040 GHz - 473,244 stalled-cycles-frontend # 59.91% frontend cycles idle - 312,912 stalled-cycles-backend # 39.62% backend cycles idle - 580,156 instructions # 0.73 insns per cycle - # 0.82 stalled cycles per insn - 105,308 branches # 405.347 M/sec + 127 page-faults # 0.480 M/sec + 790,618 cycles # 2.990 GHz + 478,470 stalled-cycles-frontend # 60.52% frontend cycles idle + 330,579 stalled-cycles-backend # 41.81% backend cycles idle + 568,364 instructions # 0.72 insns per cycle + # 0.84 stalled cycles per insn + 102,901 branches # 389.106 M/sec branch-misses - 0.000787977 seconds time elapsed + 0.000881953 seconds time elapsed Iteration 8 Performance counter stats for './matrixMultiply 10': - 0.260692 task-clock # 0.343 CPUs utilized + 0.259611 task-clock # 0.320 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.487 M/sec - 792,954 cycles # 3.042 GHz - 475,780 stalled-cycles-frontend # 60.00% frontend cycles idle - 345,732 stalled-cycles-backend # 43.60% backend cycles idle - 582,379 instructions # 0.73 insns per cycle + 126 page-faults # 0.485 M/sec + 789,701 cycles # 3.042 GHz + 474,512 stalled-cycles-frontend # 60.09% frontend cycles idle + 332,633 stalled-cycles-backend # 42.12% backend cycles idle + 576,821 instructions # 0.73 insns per cycle # 0.82 stalled cycles per insn - 105,697 branches # 405.448 M/sec + 104,740 branches # 403.450 M/sec branch-misses - 0.000759952 seconds time elapsed + 0.000811846 seconds time elapsed Iteration 9 Performance counter stats for './matrixMultiply 10': - 0.260917 task-clock # 0.268 CPUs utilized + 0.261265 task-clock # 0.147 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.487 M/sec - 793,729 cycles # 3.042 GHz - 477,328 stalled-cycles-frontend # 60.14% frontend cycles idle - 328,501 stalled-cycles-backend # 41.39% backend cycles idle - 578,107 instructions # 0.73 insns per cycle - # 0.83 stalled cycles per insn - 104,943 branches # 402.208 M/sec + 126 page-faults # 0.482 M/sec + 793,438 cycles # 3.037 GHz + 479,473 stalled-cycles-frontend # 60.43% frontend cycles idle + 320,606 stalled-cycles-backend # 40.41% backend cycles idle + 572,808 instructions # 0.72 insns per cycle + # 0.84 stalled cycles per insn + 104,034 branches # 398.193 M/sec branch-misses - 0.000975280 seconds time elapsed + 0.001780051 seconds time elapsed Iteration 10 Performance counter stats for './matrixMultiply 10': - 0.263182 task-clock # 0.237 CPUs utilized + 0.261124 task-clock # 0.302 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 127 page-faults # 0.483 M/sec - 791,537 cycles # 3.008 GHz - 478,876 stalled-cycles-frontend # 60.50% frontend cycles idle - 307,587 stalled-cycles-backend # 38.86% backend cycles idle - 569,014 instructions # 0.72 insns per cycle - # 0.84 stalled cycles per insn - 102,992 branches # 391.334 M/sec + 127 page-faults # 0.486 M/sec + 794,602 cycles # 3.043 GHz + 476,906 stalled-cycles-frontend # 60.02% frontend cycles idle + 329,122 stalled-cycles-backend # 41.42% backend cycles idle + 583,923 instructions # 0.73 insns per cycle + # 0.82 stalled cycles per insn + 105,965 branches # 405.803 M/sec branch-misses - 0.001110128 seconds time elapsed + 0.000863590 seconds time elapsed diff --git a/sankalp/gcc_output_100 b/sankalp/gcc_output_100 index 0ae1a5c..cecefd4 100644 --- a/sankalp/gcc_output_100 +++ b/sankalp/gcc_output_100 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for './matrixMultiply 100': - 8.082594 task-clock # 0.229 CPUs utilized + 8.054504 task-clock # 0.903 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.020 M/sec - 24,637,996 cycles # 3.048 GHz [50.73%] - 6,986,651 stalled-cycles-frontend # 28.36% frontend cycles idle - 1,008,145 stalled-cycles-backend # 4.09% backend cycles idle - 52,133,562 instructions # 2.12 insns per cycle - # 0.13 stalled cycles per insn - 1,477,235 branches # 182.767 M/sec - 11,701 branch-misses # 0.79% of all branches [84.88%] + 24,610,696 cycles # 3.056 GHz [50.57%] + 7,909,722 stalled-cycles-frontend # 32.14% frontend cycles idle + 689,986 stalled-cycles-backend # 2.80% backend cycles idle + 52,144,755 instructions # 2.12 insns per cycle + # 0.15 stalled cycles per insn + 1,478,711 branches # 183.588 M/sec + 11,727 branch-misses # 0.79% of all branches [76.43%] - 0.035366218 seconds time elapsed + 0.008919947 seconds time elapsed Iteration 2 Performance counter stats for './matrixMultiply 100': - 8.050201 task-clock # 0.479 CPUs utilized + 8.117808 task-clock # 0.932 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.020 M/sec - 24,595,103 cycles # 3.055 GHz [50.55%] - 8,624,213 stalled-cycles-frontend # 35.06% frontend cycles idle - 689,549 stalled-cycles-backend # 2.80% backend cycles idle - 52,141,529 instructions # 2.12 insns per cycle - # 0.17 stalled cycles per insn - 1,478,633 branches # 183.677 M/sec - 11,859 branch-misses # 0.80% of all branches [69.94%] + 24,718,750 cycles # 3.045 GHz [50.92%] + 8,195,388 stalled-cycles-frontend # 33.15% frontend cycles idle + 897,551 stalled-cycles-backend # 3.63% backend cycles idle + 52,145,300 instructions # 2.11 insns per cycle + # 0.16 stalled cycles per insn + 1,478,850 branches # 182.174 M/sec + 11,724 branch-misses # 0.79% of all branches [74.58%] - 0.016799654 seconds time elapsed + 0.008711930 seconds time elapsed Iteration 3 Performance counter stats for './matrixMultiply 100': - 8.147851 task-clock # 0.375 CPUs utilized + 8.111482 task-clock # 0.927 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 158 page-faults # 0.019 M/sec - 24,820,799 cycles # 3.046 GHz [51.05%] - 8,571,190 stalled-cycles-frontend # 34.53% frontend cycles idle - 1,023,027 stalled-cycles-backend # 4.12% backend cycles idle - 52,141,396 instructions # 2.10 insns per cycle + 24,714,858 cycles # 3.047 GHz [50.88%] + 8,209,540 stalled-cycles-frontend # 33.22% frontend cycles idle + 698,504 stalled-cycles-backend # 2.83% backend cycles idle + 52,145,272 instructions # 2.11 insns per cycle # 0.16 stalled cycles per insn - 1,478,585 branches # 181.469 M/sec - 11,862 branch-misses # 0.80% of all branches [71.63%] + 1,478,847 branches # 182.315 M/sec + 11,726 branch-misses # 0.79% of all branches [74.46%] - 0.021745919 seconds time elapsed + 0.008749660 seconds time elapsed Iteration 4 Performance counter stats for './matrixMultiply 100': - 8.067087 task-clock # 0.404 CPUs utilized + 8.148961 task-clock # 0.917 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 159 page-faults # 0.020 M/sec - 24,615,481 cycles # 3.051 GHz [50.64%] - 7,610,015 stalled-cycles-frontend # 30.92% frontend cycles idle - 706,958 stalled-cycles-backend # 2.87% backend cycles idle - 52,143,297 instructions # 2.12 insns per cycle - # 0.15 stalled cycles per insn - 1,478,700 branches # 183.300 M/sec - 11,797 branch-misses # 0.80% of all branches [79.23%] + 158 page-faults # 0.019 M/sec + 24,818,324 cycles # 3.046 GHz [51.10%] + 6,387,176 stalled-cycles-frontend # 25.74% frontend cycles idle + 574,390 stalled-cycles-backend # 2.31% backend cycles idle + 52,168,459 instructions # 2.10 insns per cycle + # 0.12 stalled cycles per insn + 1,483,171 branches # 182.007 M/sec + 11,840 branch-misses # 0.80% of all branches [90.60%] - 0.019968319 seconds time elapsed + 0.008883245 seconds time elapsed Iteration 5 Performance counter stats for './matrixMultiply 100': - 8.073882 task-clock # 0.362 CPUs utilized + 8.087013 task-clock # 0.908 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.020 M/sec - 24,681,444 cycles # 3.057 GHz [77.01%] - 10,796,404 stalled-cycles-frontend # 43.74% frontend cycles idle [50.66%] - 655,582 stalled-cycles-backend # 2.66% backend cycles idle - 52,141,219 instructions # 2.11 insns per cycle - # 0.21 stalled cycles per insn - 1,478,123 branches # 183.075 M/sec - 17,389 branch-misses # 1.18% of all branches + 24,632,136 cycles # 3.046 GHz [50.73%] + 6,193,928 stalled-cycles-frontend # 25.15% frontend cycles idle + 772,134 stalled-cycles-backend # 3.13% backend cycles idle + 52,143,390 instructions # 2.12 insns per cycle + # 0.12 stalled cycles per insn + 1,478,507 branches # 182.825 M/sec + 11,557 branch-misses # 0.78% of all branches [91.88%] - 0.022306904 seconds time elapsed + 0.008906666 seconds time elapsed Iteration 6 Performance counter stats for './matrixMultiply 100': - 8.133306 task-clock # 0.422 CPUs utilized + 8.084764 task-clock # 0.930 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.020 M/sec - 24,829,473 cycles # 3.053 GHz [50.97%] - 9,317,416 stalled-cycles-frontend # 37.53% frontend cycles idle - 1,151,323 stalled-cycles-backend # 4.64% backend cycles idle - 52,140,727 instructions # 2.10 insns per cycle - # 0.18 stalled cycles per insn - 1,478,462 branches # 181.779 M/sec - 11,889 branch-misses # 0.80% of all branches [64.98%] + 24,635,382 cycles # 3.047 GHz [50.73%] + 10,318,247 stalled-cycles-frontend # 41.88% frontend cycles idle + 985,300 stalled-cycles-backend # 4.00% backend cycles idle + 52,144,339 instructions # 2.12 insns per cycle + # 0.20 stalled cycles per insn + 1,478,689 branches # 182.898 M/sec + 12,290 branch-misses # 0.83% of all branches [54.83%] - 0.019272832 seconds time elapsed + 0.008697934 seconds time elapsed Iteration 7 Performance counter stats for './matrixMultiply 100': - 8.088853 task-clock # 0.448 CPUs utilized + 8.080921 task-clock # 0.933 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 158 page-faults # 0.020 M/sec - 24,718,914 cycles # 3.056 GHz [50.73%] - 5,855,324 stalled-cycles-frontend # 23.69% frontend cycles idle - 800,691 stalled-cycles-backend # 3.24% backend cycles idle - 52,142,592 instructions # 2.11 insns per cycle - # 0.11 stalled cycles per insn - 1,478,383 branches # 182.768 M/sec - 11,526 branch-misses # 0.78% of all branches [95.44%] + 159 page-faults # 0.020 M/sec + 24,599,853 cycles # 3.044 GHz [50.70%] + 6,254,276 stalled-cycles-frontend # 25.42% frontend cycles idle + 871,510 stalled-cycles-backend # 3.54% backend cycles idle + 52,146,066 instructions # 2.12 insns per cycle + # 0.12 stalled cycles per insn + 1,479,020 branches # 183.026 M/sec + 11,596 branch-misses # 0.78% of all branches [91.42%] - 0.018044272 seconds time elapsed + 0.008656610 seconds time elapsed Iteration 8 Performance counter stats for './matrixMultiply 100': - 8.100240 task-clock # 0.402 CPUs utilized + 8.104023 task-clock # 0.910 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 158 page-faults # 0.020 M/sec - 24,651,938 cycles # 3.043 GHz [50.81%] - 9,684,842 stalled-cycles-frontend # 39.29% frontend cycles idle - 756,211 stalled-cycles-backend # 3.07% backend cycles idle - 52,135,360 instructions # 2.11 insns per cycle - # 0.19 stalled cycles per insn - 1,477,522 branches # 182.405 M/sec - 11,963 branch-misses # 0.81% of all branches [60.49%] + 159 page-faults # 0.020 M/sec + 24,721,453 cycles # 3.051 GHz [50.81%] + 7,609,755 stalled-cycles-frontend # 30.78% frontend cycles idle + 797,005 stalled-cycles-backend # 3.22% backend cycles idle + 52,152,265 instructions # 2.11 insns per cycle + # 0.15 stalled cycles per insn + 1,480,154 branches # 182.644 M/sec + 11,875 branch-misses # 0.80% of all branches [79.49%] - 0.020160354 seconds time elapsed + 0.008901112 seconds time elapsed Iteration 9 Performance counter stats for './matrixMultiply 100': - 8.053028 task-clock # 0.367 CPUs utilized + 8.105288 task-clock # 0.924 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 158 page-faults # 0.020 M/sec - 24,606,167 cycles # 3.056 GHz [50.56%] - 9,034,157 stalled-cycles-frontend # 36.72% frontend cycles idle - 623,010 stalled-cycles-backend # 2.53% backend cycles idle - 52,142,491 instructions # 2.12 insns per cycle + 159 page-faults # 0.020 M/sec + 24,710,843 cycles # 3.049 GHz [50.85%] + 8,609,115 stalled-cycles-frontend # 34.84% frontend cycles idle + 511,967 stalled-cycles-backend # 2.07% backend cycles idle + 52,146,432 instructions # 2.11 insns per cycle # 0.17 stalled cycles per insn - 1,478,373 branches # 183.580 M/sec - 11,952 branch-misses # 0.81% of all branches [66.29%] + 1,479,068 branches # 182.482 M/sec + 11,732 branch-misses # 0.79% of all branches [70.47%] - 0.021926602 seconds time elapsed + 0.008771563 seconds time elapsed Iteration 10 Performance counter stats for './matrixMultiply 100': - 8.095434 task-clock # 0.411 CPUs utilized + 8.104444 task-clock # 0.935 CPUs utilized 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 159 page-faults # 0.020 M/sec - 24,739,370 cycles # 3.056 GHz [50.82%] - 10,304,707 stalled-cycles-frontend # 41.65% frontend cycles idle - 719,320 stalled-cycles-backend # 2.91% backend cycles idle - 52,143,820 instructions # 2.11 insns per cycle - # 0.20 stalled cycles per insn - 1,478,991 branches # 182.694 M/sec - 12,024 branch-misses # 0.81% of all branches [55.52%] + 24,657,762 cycles # 3.042 GHz [50.84%] + 8,398,717 stalled-cycles-frontend # 34.06% frontend cycles idle + 1,004,334 stalled-cycles-backend # 4.07% backend cycles idle + 52,144,374 instructions # 2.11 insns per cycle + # 0.16 stalled cycles per insn + 1,478,685 branches # 182.454 M/sec + 11,712 branch-misses # 0.79% of all branches [72.47%] - 0.019719568 seconds time elapsed + 0.008664229 seconds time elapsed diff --git a/sankalp/gcc_output_1000 b/sankalp/gcc_output_1000 index cea6165..143e8fe 100644 --- a/sankalp/gcc_output_1000 +++ b/sankalp/gcc_output_1000 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for './matrixMultiply 1000': - 9989.170541 task-clock # 0.996 CPUs utilized - 11 context-switches # 0.000 M/sec - 0 CPU-migrations # 0.000 M/sec - 3,075 page-faults # 0.000 M/sec - 30,581,586,932 cycles # 3.061 GHz [83.30%] - 17,622,767,995 stalled-cycles-frontend # 57.63% frontend cycles idle [83.34%] - 4,193,237,081 stalled-cycles-backend # 13.71% backend cycles idle [66.69%] - 50,156,776,557 instructions # 1.64 insns per cycle - # 0.35 stalled cycles per insn [83.34%] - 1,037,731,568 branches # 103.886 M/sec [83.34%] - 1,104,588 branch-misses # 0.11% of all branches [83.34%] + 9926.708359 task-clock # 0.997 CPUs utilized + 13 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 30,395,895,247 cycles # 3.062 GHz [83.32%] + 17,442,111,645 stalled-cycles-frontend # 57.38% frontend cycles idle [83.32%] + 4,164,384,143 stalled-cycles-backend # 13.70% backend cycles idle [66.68%] + 50,155,717,267 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.36%] + 1,037,542,959 branches # 104.520 M/sec [83.36%] + 1,062,313 branch-misses # 0.10% of all branches [83.32%] - 10.027304870 seconds time elapsed + 9.952598326 seconds time elapsed Iteration 2 Performance counter stats for './matrixMultiply 1000': - 9968.053405 task-clock # 0.997 CPUs utilized - 13 context-switches # 0.000 M/sec + 9989.945693 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,074 page-faults # 0.000 M/sec - 30,521,267,945 cycles # 3.062 GHz [83.31%] - 17,556,447,969 stalled-cycles-frontend # 57.52% frontend cycles idle [83.34%] - 4,278,889,715 stalled-cycles-backend # 14.02% backend cycles idle [66.70%] - 50,162,132,165 instructions # 1.64 insns per cycle + 30,589,481,178 cycles # 3.062 GHz [83.31%] + 17,621,887,593 stalled-cycles-frontend # 57.61% frontend cycles idle [83.34%] + 4,027,488,464 stalled-cycles-backend # 13.17% backend cycles idle [66.69%] + 50,159,899,384 instructions # 1.64 insns per cycle # 0.35 stalled cycles per insn [83.35%] - 1,037,644,970 branches # 104.097 M/sec [83.35%] - 1,072,195 branch-misses # 0.10% of all branches [83.32%] + 1,037,610,140 branches # 103.865 M/sec [83.34%] + 1,072,230 branch-misses # 0.10% of all branches [83.34%] - 9.993975874 seconds time elapsed + 10.015847661 seconds time elapsed Iteration 3 Performance counter stats for './matrixMultiply 1000': - 9926.186121 task-clock # 0.997 CPUs utilized + 9872.782392 task-clock # 0.997 CPUs utilized 12 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,074 page-faults # 0.000 M/sec - 30,394,192,927 cycles # 3.062 GHz [83.32%] - 17,440,517,077 stalled-cycles-frontend # 57.38% frontend cycles idle [83.32%] - 4,145,893,695 stalled-cycles-backend # 13.64% backend cycles idle [66.67%] - 50,150,109,486 instructions # 1.65 insns per cycle - # 0.35 stalled cycles per insn [83.35%] - 1,037,595,495 branches # 104.531 M/sec [83.36%] - 1,078,895 branch-misses # 0.10% of all branches [83.35%] + 30,230,798,240 cycles # 3.062 GHz [83.31%] + 17,272,580,035 stalled-cycles-frontend # 57.14% frontend cycles idle [83.34%] + 3,944,001,447 stalled-cycles-backend # 13.05% backend cycles idle [66.70%] + 50,157,447,316 instructions # 1.66 insns per cycle + # 0.34 stalled cycles per insn [83.35%] + 1,037,562,856 branches # 105.093 M/sec [83.35%] + 1,070,768 branch-misses # 0.10% of all branches [83.33%] - 9.952122977 seconds time elapsed + 9.898330956 seconds time elapsed Iteration 4 Performance counter stats for './matrixMultiply 1000': - 9881.740024 task-clock # 0.997 CPUs utilized + 9960.652528 task-clock # 0.997 CPUs utilized 12 context-switches # 0.000 M/sec - 0 CPU-migrations # 0.000 M/sec + 2 CPU-migrations # 0.000 M/sec 3,075 page-faults # 0.000 M/sec - 30,257,682,330 cycles # 3.062 GHz [83.32%] - 17,304,847,964 stalled-cycles-frontend # 57.19% frontend cycles idle [83.32%] - 3,844,903,274 stalled-cycles-backend # 12.71% backend cycles idle [66.65%] - 50,164,214,193 instructions # 1.66 insns per cycle - # 0.34 stalled cycles per insn [83.32%] - 1,036,954,474 branches # 104.936 M/sec [83.36%] - 1,080,068 branch-misses # 0.10% of all branches [83.36%] + 30,442,315,955 cycles # 3.056 GHz [83.34%] + 17,495,255,191 stalled-cycles-frontend # 57.47% frontend cycles idle [83.34%] + 4,435,073,572 stalled-cycles-backend # 14.57% backend cycles idle [66.67%] + 50,158,527,538 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.34%] + 1,037,684,531 branches # 104.178 M/sec [83.34%] + 1,090,517 branch-misses # 0.11% of all branches [83.35%] - 9.907367067 seconds time elapsed + 9.986475787 seconds time elapsed Iteration 5 Performance counter stats for './matrixMultiply 1000': - 9885.426106 task-clock # 0.997 CPUs utilized - 13 context-switches # 0.000 M/sec + 9983.888308 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 3,075 page-faults # 0.000 M/sec - 30,269,280,501 cycles # 3.062 GHz [83.33%] - 17,313,520,355 stalled-cycles-frontend # 57.20% frontend cycles idle [83.33%] - 3,835,575,528 stalled-cycles-backend # 12.67% backend cycles idle [66.66%] - 50,153,925,918 instructions # 1.66 insns per cycle + 3,074 page-faults # 0.000 M/sec + 30,571,145,817 cycles # 3.062 GHz [83.33%] + 17,613,563,281 stalled-cycles-frontend # 57.61% frontend cycles idle [83.34%] + 4,138,212,697 stalled-cycles-backend # 13.54% backend cycles idle [66.67%] + 50,155,387,715 instructions # 1.64 insns per cycle # 0.35 stalled cycles per insn [83.33%] - 1,036,820,042 branches # 104.884 M/sec [83.37%] - 1,066,630 branch-misses # 0.10% of all branches [83.34%] + 1,037,567,984 branches # 103.924 M/sec [83.33%] + 1,068,596 branch-misses # 0.10% of all branches [83.35%] - 9.911252910 seconds time elapsed + 10.010151775 seconds time elapsed Iteration 6 Performance counter stats for './matrixMultiply 1000': - 9889.983104 task-clock # 0.997 CPUs utilized + 9853.437478 task-clock # 0.997 CPUs utilized 13 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 3,074 page-faults # 0.000 M/sec - 30,280,695,223 cycles # 3.062 GHz [83.34%] - 17,324,211,616 stalled-cycles-frontend # 57.21% frontend cycles idle [83.34%] - 4,051,759,479 stalled-cycles-backend # 13.38% backend cycles idle [66.68%] - 50,157,057,504 instructions # 1.66 insns per cycle - # 0.35 stalled cycles per insn [83.34%] - 1,037,558,475 branches # 104.910 M/sec [83.34%] - 1,070,979 branch-misses # 0.10% of all branches [83.32%] + 3,075 page-faults # 0.000 M/sec + 30,170,703,152 cycles # 3.062 GHz [83.32%] + 17,222,155,727 stalled-cycles-frontend # 57.08% frontend cycles idle [83.32%] + 4,055,670,788 stalled-cycles-backend # 13.44% backend cycles idle [66.67%] + 50,155,779,844 instructions # 1.66 insns per cycle + # 0.34 stalled cycles per insn [83.36%] + 1,037,560,137 branches # 105.299 M/sec [83.36%] + 1,074,365 branch-misses # 0.10% of all branches [83.35%] - 9.915589553 seconds time elapsed + 9.879197104 seconds time elapsed Iteration 7 Performance counter stats for './matrixMultiply 1000': - 9939.642605 task-clock # 0.997 CPUs utilized - 13 context-switches # 0.000 M/sec + 9885.285809 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 3,074 page-faults # 0.000 M/sec - 30,428,586,559 cycles # 3.061 GHz [83.34%] - 17,480,604,113 stalled-cycles-frontend # 57.45% frontend cycles idle [83.34%] - 4,012,801,621 stalled-cycles-backend # 13.19% backend cycles idle [66.68%] - 50,157,234,655 instructions # 1.65 insns per cycle - # 0.35 stalled cycles per insn [83.34%] - 1,037,647,811 branches # 104.395 M/sec [83.34%] - 1,069,193 branch-misses # 0.10% of all branches [83.32%] + 3,075 page-faults # 0.000 M/sec + 30,268,249,154 cycles # 3.062 GHz [83.34%] + 17,314,471,270 stalled-cycles-frontend # 57.20% frontend cycles idle [83.33%] + 4,082,457,570 stalled-cycles-backend # 13.49% backend cycles idle [66.66%] + 50,154,666,290 instructions # 1.66 insns per cycle + # 0.35 stalled cycles per insn [83.33%] + 1,037,650,375 branches # 104.969 M/sec [83.33%] + 1,085,196 branch-misses # 0.10% of all branches [83.37%] - 9.965459847 seconds time elapsed + 9.910950714 seconds time elapsed Iteration 8 Performance counter stats for './matrixMultiply 1000': - 9920.452250 task-clock # 0.997 CPUs utilized - 11 context-switches # 0.000 M/sec + 9914.407092 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 3,074 page-faults # 0.000 M/sec - 30,376,652,538 cycles # 3.062 GHz [83.31%] - 17,415,976,577 stalled-cycles-frontend # 57.33% frontend cycles idle [83.34%] - 3,998,906,672 stalled-cycles-backend # 13.16% backend cycles idle [66.70%] - 50,156,200,190 instructions # 1.65 insns per cycle - # 0.35 stalled cycles per insn [83.35%] - 1,037,605,358 branches # 104.593 M/sec [83.35%] - 1,072,587 branch-misses # 0.10% of all branches [83.32%] + 30,356,897,007 cycles # 3.062 GHz [83.34%] + 17,403,547,284 stalled-cycles-frontend # 57.33% frontend cycles idle [83.34%] + 4,191,887,997 stalled-cycles-backend # 13.81% backend cycles idle [66.68%] + 50,156,122,104 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.34%] + 1,037,539,929 branches # 104.650 M/sec [83.34%] + 1,065,113 branch-misses # 0.10% of all branches [83.31%] - 9.946187560 seconds time elapsed + 9.940317228 seconds time elapsed Iteration 9 Performance counter stats for './matrixMultiply 1000': - 9950.631435 task-clock # 0.997 CPUs utilized - 13 context-switches # 0.000 M/sec + 9937.674711 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 3,075 page-faults # 0.000 M/sec - 30,468,120,896 cycles # 3.062 GHz [83.32%] - 17,513,181,615 stalled-cycles-frontend # 57.48% frontend cycles idle [83.32%] - 4,285,615,016 stalled-cycles-backend # 14.07% backend cycles idle [66.67%] - 50,146,044,213 instructions # 1.65 insns per cycle - # 0.35 stalled cycles per insn [83.36%] - 1,037,530,014 branches # 104.268 M/sec [83.36%] - 1,082,040 branch-misses # 0.10% of all branches [83.35%] + 3,074 page-faults # 0.000 M/sec + 30,427,817,353 cycles # 3.062 GHz [83.34%] + 17,471,897,841 stalled-cycles-frontend # 57.42% frontend cycles idle [83.34%] + 4,027,974,555 stalled-cycles-backend # 13.24% backend cycles idle [66.68%] + 50,158,261,034 instructions # 1.65 insns per cycle + # 0.35 stalled cycles per insn [83.34%] + 1,037,803,159 branches # 104.431 M/sec [83.34%] + 1,088,832 branch-misses # 0.10% of all branches [83.32%] - 9.976606602 seconds time elapsed + 9.963562648 seconds time elapsed Iteration 10 Performance counter stats for './matrixMultiply 1000': - 9850.001597 task-clock # 0.997 CPUs utilized - 12 context-switches # 0.000 M/sec + 9878.683493 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec - 3,075 page-faults # 0.000 M/sec - 30,160,451,883 cycles # 3.062 GHz [83.31%] - 17,209,665,022 stalled-cycles-frontend # 57.06% frontend cycles idle [83.31%] - 3,830,145,916 stalled-cycles-backend # 12.70% backend cycles idle [66.69%] - 50,157,904,839 instructions # 1.66 insns per cycle - # 0.34 stalled cycles per insn [83.35%] - 1,037,634,480 branches # 105.344 M/sec [83.35%] - 1,087,305 branch-misses # 0.10% of all branches [83.35%] - - 9.875574586 seconds time elapsed + 3,074 page-faults # 0.000 M/sec + 30,249,098,352 cycles # 3.062 GHz [83.32%] + 17,298,067,630 stalled-cycles-frontend # 57.19% frontend cycles idle [83.32%] + 3,974,575,999 stalled-cycles-backend # 13.14% backend cycles idle [66.67%] + 50,150,796,611 instructions # 1.66 insns per cycle + # 0.34 stalled cycles per insn [83.36%] + 1,037,562,373 branches # 105.030 M/sec [83.36%] + 1,067,234 branch-misses # 0.10% of all branches [83.34%] + + 9.904228039 seconds time elapsed diff --git a/sankalp/gcc_output_10000 b/sankalp/gcc_output_10000 deleted file mode 100644 index 78f379f..0000000 --- a/sankalp/gcc_output_10000 +++ /dev/null @@ -1 +0,0 @@ -Iteration 1 diff --git a/sankalp/gcc_output_3000 b/sankalp/gcc_output_3000 new file mode 100644 index 0000000..1da4004 --- /dev/null +++ b/sankalp/gcc_output_3000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for './matrixMultiply 3000': + + 475300.257913 task-clock # 0.997 CPUs utilized + 594 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,455,206,821,524 cycles # 3.062 GHz [83.33%] + 1,096,016,878,004 stalled-cycles-frontend # 75.32% frontend cycles idle [83.33%] + 590,754,550,647 stalled-cycles-backend # 40.60% backend cycles idle [66.67%] + 1,351,742,482,872 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,393,869,687 branches # 57.635 M/sec [83.33%] + 14,090,789 branch-misses # 0.05% of all branches [83.33%] + + 476.494079626 seconds time elapsed + +Iteration 2 + + Performance counter stats for './matrixMultiply 3000': + + 476018.020595 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,457,356,444,320 cycles # 3.062 GHz [83.33%] + 1,097,887,781,677 stalled-cycles-frontend # 75.33% frontend cycles idle [83.33%] + 593,165,290,075 stalled-cycles-backend # 40.70% backend cycles idle [66.67%] + 1,351,747,334,373 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,936,581 branches # 57.554 M/sec [83.33%] + 14,020,326 branch-misses # 0.05% of all branches [83.33%] + + 477.213941232 seconds time elapsed + +Iteration 3 + + Performance counter stats for './matrixMultiply 3000': + + 474569.437761 task-clock # 0.997 CPUs utilized + 595 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,452,880,580,364 cycles # 3.061 GHz [83.33%] + 1,093,658,008,544 stalled-cycles-frontend # 75.28% frontend cycles idle [83.33%] + 598,953,855,484 stalled-cycles-backend # 41.23% backend cycles idle [66.67%] + 1,351,708,174,318 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,397,137,447 branches # 57.731 M/sec [83.33%] + 13,353,583 branch-misses # 0.05% of all branches [83.33%] + + 475.763890065 seconds time elapsed + +Iteration 4 + + Performance counter stats for './matrixMultiply 3000': + + 475646.500239 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 1 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,456,163,649,462 cycles # 3.061 GHz [83.33%] + 1,096,827,868,403 stalled-cycles-frontend # 75.32% frontend cycles idle [83.33%] + 604,183,182,642 stalled-cycles-backend # 41.49% backend cycles idle [66.67%] + 1,351,761,637,391 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,786,793 branches # 57.599 M/sec [83.33%] + 13,320,251 branch-misses # 0.05% of all branches [83.33%] + + 476.841596270 seconds time elapsed + +Iteration 5 + + Performance counter stats for './matrixMultiply 3000': + + 476211.404942 task-clock # 0.997 CPUs utilized + 597 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,457,959,876,543 cycles # 3.062 GHz [83.33%] + 1,098,411,393,007 stalled-cycles-frontend # 75.34% frontend cycles idle [83.33%] + 607,061,107,646 stalled-cycles-backend # 41.64% backend cycles idle [66.67%] + 1,351,758,909,796 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,397,007,312 branches # 57.531 M/sec [83.33%] + 13,916,373 branch-misses # 0.05% of all branches [83.33%] + + 477.407551808 seconds time elapsed + +Iteration 6 + + Performance counter stats for './matrixMultiply 3000': + + 475948.824921 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,457,196,407,185 cycles # 3.062 GHz [83.33%] + 1,097,491,418,801 stalled-cycles-frontend # 75.32% frontend cycles idle [83.33%] + 619,424,280,588 stalled-cycles-backend # 42.51% backend cycles idle [66.67%] + 1,351,752,463,467 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,395,156,718 branches # 57.559 M/sec [83.33%] + 13,646,100 branch-misses # 0.05% of all branches [83.33%] + + 477.144006180 seconds time elapsed + +Iteration 7 + + Performance counter stats for './matrixMultiply 3000': + + 476555.496544 task-clock # 0.997 CPUs utilized + 597 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,459,055,808,779 cycles # 3.062 GHz [83.33%] + 1,099,359,336,770 stalled-cycles-frontend # 75.35% frontend cycles idle [83.33%] + 605,859,886,112 stalled-cycles-backend # 41.52% backend cycles idle [66.67%] + 1,351,748,188,845 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,976,815 branches # 57.490 M/sec [83.33%] + 13,608,298 branch-misses # 0.05% of all branches [83.33%] + + 477.751263190 seconds time elapsed + +Iteration 8 + + Performance counter stats for './matrixMultiply 3000': + + 476550.619236 task-clock # 0.997 CPUs utilized + 596 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 1,458,990,788,545 cycles # 3.062 GHz [83.33%] + 1,099,354,204,996 stalled-cycles-frontend # 75.35% frontend cycles idle [83.33%] + 605,327,241,764 stalled-cycles-backend # 41.49% backend cycles idle [66.67%] + 1,351,757,320,368 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,396,713,068 branches # 57.490 M/sec [83.33%] + 13,874,648 branch-misses # 0.05% of all branches [83.33%] + + 477.765602818 seconds time elapsed + +Iteration 9 + + Performance counter stats for './matrixMultiply 3000': + + 477530.594343 task-clock # 0.997 CPUs utilized + 597 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,461,959,826,317 cycles # 3.061 GHz [83.33%] + 1,102,192,822,811 stalled-cycles-frontend # 75.39% frontend cycles idle [83.33%] + 602,765,299,108 stalled-cycles-backend # 41.23% backend cycles idle [66.67%] + 1,351,757,093,932 instructions # 0.92 insns per cycle + # 0.82 stalled cycles per insn [83.33%] + 27,398,383,869 branches # 57.375 M/sec [83.33%] + 13,438,518 branch-misses # 0.05% of all branches [83.33%] + + 478.728380489 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply 3000': + + 477246.288455 task-clock # 0.997 CPUs utilized + 598 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 1,461,142,413,968 cycles # 3.062 GHz [83.33%] + 1,101,548,208,648 stalled-cycles-frontend # 75.39% frontend cycles idle [83.33%] + 610,983,270,807 stalled-cycles-backend # 41.82% backend cycles idle [66.67%] + 1,351,767,194,355 instructions # 0.93 insns per cycle + # 0.81 stalled cycles per insn [83.33%] + 27,397,222,382 branches # 57.407 M/sec [83.33%] + 14,071,841 branch-misses # 0.05% of all branches [83.33%] + + 478.444205849 seconds time elapsed + diff --git a/sankalp/gcc_output_optimized_10 b/sankalp/gcc_output_optimized_10 new file mode 100644 index 0000000..ac95581 --- /dev/null +++ b/sankalp/gcc_output_optimized_10 @@ -0,0 +1,180 @@ +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.248266 task-clock # 0.177 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.512 M/sec + cycles + 456,214 stalled-cycles-frontend # 0.00% frontend cycles idle + 322,343 stalled-cycles-backend # 0.00% backend cycles idle + 525,274 instructions # 0.00 insns per cycle + # 0.87 stalled cycles per insn + 102,761 branches # 413.915 M/sec + 6,305 branch-misses # 6.14% of all branches + + 0.001399414 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.250526 task-clock # 0.158 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 126 page-faults # 0.503 M/sec + 761,374 cycles # 3.039 GHz + 460,669 stalled-cycles-frontend # 60.50% frontend cycles idle + 278,379 stalled-cycles-backend # 36.56% backend cycles idle + 531,203 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 104,155 branches # 415.745 M/sec + branch-misses + + 0.001584906 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.256776 task-clock # 0.261 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.495 M/sec + 769,126 cycles # 2.995 GHz + 464,343 stalled-cycles-frontend # 60.37% frontend cycles idle + 305,155 stalled-cycles-backend # 39.68% backend cycles idle + 537,950 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,335 branches # 410.221 M/sec + branch-misses + + 0.000984387 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.252672 task-clock # 0.303 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.503 M/sec + 758,151 cycles # 3.001 GHz + 456,805 stalled-cycles-frontend # 60.25% frontend cycles idle + 312,114 stalled-cycles-backend # 41.17% backend cycles idle + 535,170 instructions # 0.71 insns per cycle + # 0.85 stalled cycles per insn + 104,829 branches # 414.882 M/sec + branch-misses + + 0.000834122 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.256107 task-clock # 0.319 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.496 M/sec + 777,635 cycles # 3.036 GHz + 471,077 stalled-cycles-frontend # 60.58% frontend cycles idle + 311,009 stalled-cycles-backend # 39.99% backend cycles idle + 537,745 instructions # 0.69 insns per cycle + # 0.88 stalled cycles per insn + 105,304 branches # 411.172 M/sec + branch-misses + + 0.000803563 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.253460 task-clock # 0.266 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.501 M/sec + 770,539 cycles # 3.040 GHz + 464,693 stalled-cycles-frontend # 60.31% frontend cycles idle + 320,075 stalled-cycles-backend # 41.54% backend cycles idle + 535,462 instructions # 0.69 insns per cycle + # 0.87 stalled cycles per insn + 104,877 branches # 413.781 M/sec + branch-misses + + 0.000953846 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.257419 task-clock # 0.307 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.493 M/sec + 771,110 cycles # 2.996 GHz + 466,926 stalled-cycles-frontend # 60.55% frontend cycles idle + 338,319 stalled-cycles-backend # 43.87% backend cycles idle + 536,400 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 105,084 branches # 408.222 M/sec + branch-misses + + 0.000837879 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.251920 task-clock # 0.292 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.504 M/sec + 765,830 cycles # 3.040 GHz + 460,303 stalled-cycles-frontend # 60.11% frontend cycles idle + 311,031 stalled-cycles-backend # 40.61% backend cycles idle + 536,633 instructions # 0.70 insns per cycle + # 0.86 stalled cycles per insn + 105,101 branches # 417.200 M/sec + branch-misses + + 0.000862143 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.251167 task-clock # 0.329 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.506 M/sec + 765,608 cycles # 3.048 GHz + 463,671 stalled-cycles-frontend # 60.56% frontend cycles idle + 321,814 stalled-cycles-backend # 42.03% backend cycles idle + 534,394 instructions # 0.70 insns per cycle + # 0.87 stalled cycles per insn + 104,724 branches # 416.950 M/sec + branch-misses + + 0.000763382 seconds time elapsed + +Iteration 10 + + Performance counter stats for './matrixMultiply_gcc_optimized 10': + + 0.257385 task-clock # 0.271 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 127 page-faults # 0.493 M/sec + 781,086 cycles # 3.035 GHz + 474,101 stalled-cycles-frontend # 60.70% frontend cycles idle + 323,675 stalled-cycles-backend # 41.44% backend cycles idle + 537,279 instructions # 0.69 insns per cycle + # 0.88 stalled cycles per insn + 105,124 branches # 408.431 M/sec + branch-misses + + 0.000949143 seconds time elapsed + diff --git a/sankalp/gcc_output_optimized_100 b/sankalp/gcc_output_optimized_100 new file mode 100644 index 0000000..02b8d64 --- /dev/null +++ b/sankalp/gcc_output_optimized_100 @@ -0,0 +1,180 @@ +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.849608 task-clock # 0.769 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.086 M/sec + 5,664,275 cycles # 3.062 GHz + 732,765 stalled-cycles-frontend # 12.94% frontend cycles idle + 415,452 stalled-cycles-backend # 7.33% backend cycles idle + 10,010,514 instructions # 1.77 insns per cycle + # 0.07 stalled cycles per insn + 1,456,645 branches # 787.543 M/sec + branch-misses + + 0.002405004 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.839908 task-clock # 0.666 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.086 M/sec + 5,636,324 cycles # 3.063 GHz + 708,018 stalled-cycles-frontend # 12.56% frontend cycles idle + 1,488,676 stalled-cycles-backend # 26.41% backend cycles idle + 9,999,645 instructions # 1.77 insns per cycle + # 0.15 stalled cycles per insn + 1,455,193 branches # 790.905 M/sec + branch-misses + + 0.002762590 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.894950 task-clock # 0.768 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.084 M/sec + 2,365,633 cycles # 1.248 GHz + 815,107 stalled-cycles-frontend # 34.46% frontend cycles idle + 1,395,279 stalled-cycles-backend # 58.98% backend cycles idle + 10,005,259 instructions # 4.23 insns per cycle + # 0.14 stalled cycles per insn + 1,456,126 branches # 768.424 M/sec + 14,501 branch-misses # 1.00% of all branches [58.57%] + + 0.002468015 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.875375 task-clock # 0.766 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.085 M/sec + 4,882,424 cycles # 2.603 GHz + 752,488 stalled-cycles-frontend # 15.41% frontend cycles idle + 1,341,572 stalled-cycles-backend # 27.48% backend cycles idle + 10,020,521 instructions # 2.05 insns per cycle + # 0.13 stalled cycles per insn + 1,458,408 branches # 777.662 M/sec + 15,896 branch-misses # 1.09% of all branches [14.62%] + + 0.002449323 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.847871 task-clock # 0.723 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.086 M/sec + 5,660,288 cycles # 3.063 GHz + 727,224 stalled-cycles-frontend # 12.85% frontend cycles idle + 1,493,416 stalled-cycles-backend # 26.38% backend cycles idle + 10,014,309 instructions # 1.77 insns per cycle + # 0.15 stalled cycles per insn + 1,457,300 branches # 788.637 M/sec + branch-misses + + 0.002555472 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.879068 task-clock # 0.768 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.084 M/sec + 3,421,298 cycles # 1.821 GHz + 761,542 stalled-cycles-frontend # 22.26% frontend cycles idle + 1,433,667 stalled-cycles-backend # 41.90% backend cycles idle + 10,018,860 instructions # 2.93 insns per cycle + # 0.14 stalled cycles per insn + 1,458,144 branches # 775.993 M/sec + 14,789 branch-misses # 1.01% of all branches [40.00%] + + 0.002447307 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.883634 task-clock # 0.739 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.084 M/sec + 321,967 cycles # 0.171 GHz + 796,732 stalled-cycles-frontend # 247.46% frontend cycles idle + 443,941 stalled-cycles-backend # 137.88% backend cycles idle + 10,018,569 instructions # 31.12 insns per cycle + # 0.08 stalled cycles per insn + 1,458,017 branches # 774.045 M/sec + 16,126 branch-misses # 1.11% of all branches [93.59%] + + 0.002549813 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.869718 task-clock # 0.709 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.085 M/sec + 1,144,239 cycles # 0.612 GHz + 746,887 stalled-cycles-frontend # 65.27% frontend cycles idle + 1,506,395 stalled-cycles-backend # 131.65% backend cycles idle + 10,012,630 instructions # 8.75 insns per cycle + # 0.15 stalled cycles per insn + 1,457,200 branches # 779.369 M/sec + 13,662 branch-misses # 0.94% of all branches [79.35%] + + 0.002638744 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.919213 task-clock # 0.588 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 159 page-faults # 0.083 M/sec + 1,076,319 cycles # 0.561 GHz + 895,237 stalled-cycles-frontend # 83.18% frontend cycles idle + 1,645,401 stalled-cycles-backend # 152.87% backend cycles idle + 10,022,421 instructions # 9.31 insns per cycle + # 0.16 stalled cycles per insn + 1,458,721 branches # 760.062 M/sec + 13,434 branch-misses # 0.92% of all branches [80.97%] + + 0.003264739 seconds time elapsed + +Iteration 100 + + Performance counter stats for './matrixMultiply_gcc_optimized 100': + + 1.883476 task-clock # 0.690 CPUs utilized + 0 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 158 page-faults # 0.084 M/sec + 4,552,284 cycles # 2.417 GHz + 783,434 stalled-cycles-frontend # 17.21% frontend cycles idle + 1,317,945 stalled-cycles-backend # 28.95% backend cycles idle + 10,008,721 instructions # 2.20 insns per cycle + # 0.13 stalled cycles per insn + 1,456,761 branches # 773.443 M/sec + 14,949 branch-misses # 1.03% of all branches [20.70%] + + 0.002728343 seconds time elapsed + diff --git a/sankalp/gcc_output_optimized_1000 b/sankalp/gcc_output_optimized_1000 new file mode 100644 index 0000000..4da73bd --- /dev/null +++ b/sankalp/gcc_output_optimized_1000 @@ -0,0 +1,180 @@ +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8524.007146 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,101,094,636 cycles # 3.062 GHz [83.34%] + 21,174,291,573 stalled-cycles-frontend # 81.12% frontend cycles idle [83.34%] + 8,086,059,447 stalled-cycles-backend # 30.98% backend cycles idle [66.67%] + 8,147,023,966 instructions # 0.31 insns per cycle + # 2.60 stalled cycles per insn [83.34%] + 1,034,584,832 branches # 121.373 M/sec [83.34%] + 1,056,203 branch-misses # 0.10% of all branches [83.35%] + + 8.546169904 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8480.788072 task-clock # 0.997 CPUs utilized + 12 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 25,968,524,213 cycles # 3.062 GHz [83.30%] + 21,026,920,732 stalled-cycles-frontend # 80.97% frontend cycles idle [83.35%] + 8,372,590,975 stalled-cycles-backend # 32.24% backend cycles idle [66.69%] + 8,146,964,730 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.35%] + 1,034,591,652 branches # 121.992 M/sec [83.34%] + 1,061,216 branch-misses # 0.10% of all branches [83.32%] + + 8.502870304 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8499.518119 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,025,909,327 cycles # 3.062 GHz [83.34%] + 21,094,624,277 stalled-cycles-frontend # 81.05% frontend cycles idle [83.33%] + 7,597,054,786 stalled-cycles-backend # 29.19% backend cycles idle [66.67%] + 8,146,986,115 instructions # 0.31 insns per cycle + # 2.59 stalled cycles per insn [83.34%] + 1,034,616,479 branches # 121.726 M/sec [83.33%] + 1,055,439 branch-misses # 0.10% of all branches [83.34%] + + 8.521753033 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8567.572363 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,233,696,728 cycles # 3.062 GHz [83.33%] + 21,318,293,997 stalled-cycles-frontend # 81.26% frontend cycles idle [83.33%] + 8,013,091,628 stalled-cycles-backend # 30.55% backend cycles idle [66.66%] + 8,147,236,780 instructions # 0.31 insns per cycle + # 2.62 stalled cycles per insn [83.33%] + 1,033,816,017 branches # 120.666 M/sec [83.37%] + 1,057,271 branch-misses # 0.10% of all branches [83.35%] + + 8.589968438 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8498.048583 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,020,348,975 cycles # 3.062 GHz [83.33%] + 21,088,844,072 stalled-cycles-frontend # 81.05% frontend cycles idle [83.33%] + 7,943,621,197 stalled-cycles-backend # 30.53% backend cycles idle [66.66%] + 8,146,770,884 instructions # 0.31 insns per cycle + # 2.59 stalled cycles per insn [83.33%] + 1,034,364,119 branches # 121.718 M/sec [83.33%] + 1,057,797 branch-misses # 0.10% of all branches [83.35%] + + 8.520236784 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8523.980835 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 26,099,105,396 cycles # 3.062 GHz [83.34%] + 21,174,457,333 stalled-cycles-frontend # 81.13% frontend cycles idle [83.34%] + 7,971,768,233 stalled-cycles-backend # 30.54% backend cycles idle [66.67%] + 8,147,071,539 instructions # 0.31 insns per cycle + # 2.60 stalled cycles per insn [83.34%] + 1,034,599,564 branches # 121.375 M/sec [83.34%] + 1,067,177 branch-misses # 0.10% of all branches [83.33%] + + 8.546194472 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8539.281197 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 26,147,787,605 cycles # 3.062 GHz [83.32%] + 21,232,515,363 stalled-cycles-frontend # 81.20% frontend cycles idle [83.32%] + 7,997,377,449 stalled-cycles-backend # 30.59% backend cycles idle [66.67%] + 8,142,040,940 instructions # 0.31 insns per cycle + # 2.61 stalled cycles per insn [83.36%] + 1,034,582,094 branches # 121.156 M/sec [83.37%] + 1,067,037 branch-misses # 0.10% of all branches [83.35%] + + 8.561577755 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8483.737252 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 25,976,418,040 cycles # 3.062 GHz [83.30%] + 21,031,895,833 stalled-cycles-frontend # 80.97% frontend cycles idle [83.35%] + 7,905,370,309 stalled-cycles-backend # 30.43% backend cycles idle [66.70%] + 8,147,671,922 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.35%] + 1,034,722,307 branches # 121.965 M/sec [83.35%] + 1,058,817 branch-misses # 0.10% of all branches [83.31%] + + 8.505817558 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8487.683539 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,075 page-faults # 0.000 M/sec + 25,989,385,640 cycles # 3.062 GHz [83.31%] + 21,054,478,049 stalled-cycles-frontend # 81.01% frontend cycles idle [83.31%] + 7,474,887,009 stalled-cycles-backend # 28.76% backend cycles idle [66.71%] + 8,146,958,745 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.36%] + 1,034,611,204 branches # 121.896 M/sec [83.36%] + 1,063,850 branch-misses # 0.10% of all branches [83.34%] + + 8.509833793 seconds time elapsed + +Iteration 1000 + + Performance counter stats for './matrixMultiply_gcc_optimized 1000': + + 8485.279076 task-clock # 0.997 CPUs utilized + 11 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 3,074 page-faults # 0.000 M/sec + 25,980,743,845 cycles # 3.062 GHz [83.31%] + 21,043,200,703 stalled-cycles-frontend # 81.00% frontend cycles idle [83.31%] + 7,855,243,129 stalled-cycles-backend # 30.23% backend cycles idle [66.70%] + 8,147,134,514 instructions # 0.31 insns per cycle + # 2.58 stalled cycles per insn [83.35%] + 1,034,632,398 branches # 121.933 M/sec [83.35%] + 1,068,154 branch-misses # 0.10% of all branches [83.34%] + + 8.507968113 seconds time elapsed + diff --git a/sankalp/gcc_output_optimized_3000 b/sankalp/gcc_output_optimized_3000 new file mode 100644 index 0000000..bed3fc6 --- /dev/null +++ b/sankalp/gcc_output_optimized_3000 @@ -0,0 +1,180 @@ +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 307990.722698 task-clock # 0.997 CPUs utilized + 385 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 942,885,267,154 cycles # 3.061 GHz [83.33%] + 813,151,718,128 stalled-cycles-frontend # 86.24% frontend cycles idle [83.33%] + 479,110,721,206 stalled-cycles-backend # 50.81% backend cycles idle [66.67%] + 217,537,751,166 instructions # 0.23 insns per cycle + # 3.74 stalled cycles per insn [83.33%] + 27,350,281,815 branches # 88.802 M/sec [83.33%] + 13,416,834 branch-misses # 0.05% of all branches [83.33%] + + 308.764947876 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 309095.950256 task-clock # 0.997 CPUs utilized + 388 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 946,223,423,681 cycles # 3.061 GHz [83.33%] + 816,700,149,527 stalled-cycles-frontend # 86.31% frontend cycles idle [83.33%] + 478,184,484,905 stalled-cycles-backend # 50.54% backend cycles idle [66.67%] + 217,549,241,887 instructions # 0.23 insns per cycle + # 3.75 stalled cycles per insn [83.33%] + 27,352,134,829 branches # 88.491 M/sec [83.33%] + 12,496,243 branch-misses # 0.05% of all branches [83.33%] + + 309.874694599 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 308456.151399 task-clock # 0.997 CPUs utilized + 387 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 944,147,394,731 cycles # 3.061 GHz [83.33%] + 815,004,329,682 stalled-cycles-frontend # 86.32% frontend cycles idle [83.33%] + 492,153,218,620 stalled-cycles-backend # 52.13% backend cycles idle [66.67%] + 217,542,001,519 instructions # 0.23 insns per cycle + # 3.75 stalled cycles per insn [83.33%] + 27,351,929,959 branches # 88.674 M/sec [83.33%] + 12,737,640 branch-misses # 0.05% of all branches [83.33%] + + 309.231466001 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 308648.298096 task-clock # 0.997 CPUs utilized + 387 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 944,897,614,537 cycles # 3.061 GHz [83.33%] + 815,188,834,965 stalled-cycles-frontend # 86.27% frontend cycles idle [83.33%] + 485,379,940,869 stalled-cycles-backend # 51.37% backend cycles idle [66.67%] + 217,534,979,891 instructions # 0.23 insns per cycle + # 3.75 stalled cycles per insn [83.33%] + 27,351,527,278 branches # 88.617 M/sec [83.33%] + 13,388,415 branch-misses # 0.05% of all branches [83.33%] + + 309.423862273 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 309909.768801 task-clock # 0.997 CPUs utilized + 388 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 948,763,312,383 cycles # 3.061 GHz [83.33%] + 819,052,696,950 stalled-cycles-frontend # 86.33% frontend cycles idle [83.33%] + 482,990,679,317 stalled-cycles-backend # 50.91% backend cycles idle [66.67%] + 217,539,864,459 instructions # 0.23 insns per cycle + # 3.77 stalled cycles per insn [83.33%] + 27,352,050,126 branches # 88.258 M/sec [83.33%] + 13,177,647 branch-misses # 0.05% of all branches [83.33%] + + 310.688240232 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 309327.733528 task-clock # 0.997 CPUs utilized + 388 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 947,010,787,729 cycles # 3.062 GHz [83.33%] + 817,412,412,659 stalled-cycles-frontend # 86.32% frontend cycles idle [83.33%] + 488,015,320,106 stalled-cycles-backend # 51.53% backend cycles idle [66.67%] + 217,532,974,150 instructions # 0.23 insns per cycle + # 3.76 stalled cycles per insn [83.33%] + 27,349,693,081 branches # 88.417 M/sec [83.33%] + 13,212,984 branch-misses # 0.05% of all branches [83.33%] + + 310.105193532 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 310282.492791 task-clock # 0.997 CPUs utilized + 389 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 949,907,449,960 cycles # 3.061 GHz [83.33%] + 820,459,518,182 stalled-cycles-frontend # 86.37% frontend cycles idle [83.33%] + 471,778,705,376 stalled-cycles-backend # 49.67% backend cycles idle [66.67%] + 217,524,558,295 instructions # 0.23 insns per cycle + # 3.77 stalled cycles per insn [83.33%] + 27,351,609,383 branches # 88.151 M/sec [83.33%] + 13,344,766 branch-misses # 0.05% of all branches [83.33%] + + 311.104012259 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 310349.330476 task-clock # 0.997 CPUs utilized + 389 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,356 page-faults # 0.000 M/sec + 950,012,415,697 cycles # 3.061 GHz [83.33%] + 820,923,300,577 stalled-cycles-frontend # 86.41% frontend cycles idle [83.33%] + 468,957,637,855 stalled-cycles-backend # 49.36% backend cycles idle [66.67%] + 217,545,884,095 instructions # 0.23 insns per cycle + # 3.77 stalled cycles per insn [83.33%] + 27,353,566,509 branches # 88.138 M/sec [83.33%] + 12,597,801 branch-misses # 0.05% of all branches [83.33%] + + 311.146848527 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 310848.446781 task-clock # 0.997 CPUs utilized + 389 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 951,637,386,126 cycles # 3.061 GHz [83.33%] + 822,546,224,447 stalled-cycles-frontend # 86.43% frontend cycles idle [83.33%] + 495,897,697,137 stalled-cycles-backend # 52.11% backend cycles idle [66.67%] + 217,546,544,859 instructions # 0.23 insns per cycle + # 3.78 stalled cycles per insn [83.33%] + 27,353,133,771 branches # 87.995 M/sec [83.33%] + 13,283,333 branch-misses # 0.05% of all branches [83.33%] + + 311.629074931 seconds time elapsed + +Iteration 3000 + + Performance counter stats for './matrixMultiply_gcc_optimized 3000': + + 312178.283971 task-clock # 0.997 CPUs utilized + 391 context-switches # 0.000 M/sec + 0 CPU-migrations # 0.000 M/sec + 32,357 page-faults # 0.000 M/sec + 955,712,667,148 cycles # 3.061 GHz [83.33%] + 826,044,644,959 stalled-cycles-frontend # 86.43% frontend cycles idle [83.33%] + 499,061,986,949 stalled-cycles-backend # 52.22% backend cycles idle [66.67%] + 217,558,514,147 instructions # 0.23 insns per cycle + # 3.80 stalled cycles per insn [83.33%] + 27,352,241,903 branches # 87.617 M/sec [83.33%] + 13,193,839 branch-misses # 0.05% of all branches [83.33%] + + 312.962113204 seconds time elapsed + diff --git a/sankalp/java_output_10 b/sankalp/java_output_10 index 4bde1f1..e2ed670 100644 --- a/sankalp/java_output_10 +++ b/sankalp/java_output_10 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for 'java MatrixMultiplier 10': - 73.814756 task-clock # 0.948 CPUs utilized - 157 context-switches # 0.002 M/sec - 44 CPU-migrations # 0.001 M/sec - 4,254 page-faults # 0.058 M/sec - 211,869,891 cycles # 2.870 GHz [83.29%] - 118,128,398 stalled-cycles-frontend # 55.76% frontend cycles idle [79.91%] - 79,711,925 stalled-cycles-backend # 37.62% backend cycles idle [71.12%] - 182,350,254 instructions # 0.86 insns per cycle - # 0.65 stalled cycles per insn [84.54%] - 35,144,284 branches # 476.115 M/sec [87.90%] - 2,037,718 branch-misses # 5.80% of all branches [80.29%] + 72.815106 task-clock # 0.984 CPUs utilized + 139 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.001 M/sec + 4,253 page-faults # 0.058 M/sec + 212,160,742 cycles # 2.914 GHz [83.64%] + 114,430,847 stalled-cycles-frontend # 53.94% frontend cycles idle [83.36%] + 80,787,912 stalled-cycles-backend # 38.08% backend cycles idle [62.53%] + 182,410,561 instructions # 0.86 insns per cycle + # 0.63 stalled cycles per insn [79.34%] + 34,469,054 branches # 473.378 M/sec [86.37%] + 2,054,782 branch-misses # 5.96% of all branches [86.95%] - 0.077889679 seconds time elapsed + 0.073987229 seconds time elapsed Iteration 2 Performance counter stats for 'java MatrixMultiplier 10': - 75.426543 task-clock # 0.977 CPUs utilized - 153 context-switches # 0.002 M/sec - 46 CPU-migrations # 0.001 M/sec - 4,254 page-faults # 0.056 M/sec - 220,055,780 cycles # 2.917 GHz [81.76%] - 122,029,915 stalled-cycles-frontend # 55.45% frontend cycles idle [87.61%] - 83,438,109 stalled-cycles-backend # 37.92% backend cycles idle [63.25%] - 184,216,236 instructions # 0.84 insns per cycle - # 0.66 stalled cycles per insn [80.25%] - 34,523,087 branches # 457.705 M/sec [86.17%] - 2,081,771 branch-misses # 6.03% of all branches [84.51%] + 74.167240 task-clock # 0.981 CPUs utilized + 146 context-switches # 0.002 M/sec + 44 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.057 M/sec + 217,762,691 cycles # 2.936 GHz [79.75%] + 119,247,170 stalled-cycles-frontend # 54.76% frontend cycles idle [82.58%] + 81,455,743 stalled-cycles-backend # 37.41% backend cycles idle [68.10%] + 183,508,037 instructions # 0.84 insns per cycle + # 0.65 stalled cycles per insn [84.40%] + 35,369,835 branches # 476.893 M/sec [85.66%] + 2,040,142 branch-misses # 5.77% of all branches [86.07%] - 0.077238487 seconds time elapsed + 0.075620599 seconds time elapsed Iteration 3 Performance counter stats for 'java MatrixMultiplier 10': - 74.246438 task-clock # 0.981 CPUs utilized - 145 context-switches # 0.002 M/sec - 46 CPU-migrations # 0.001 M/sec - 4,255 page-faults # 0.057 M/sec - 214,968,414 cycles # 2.895 GHz [83.94%] - 119,504,714 stalled-cycles-frontend # 55.59% frontend cycles idle [84.70%] - 82,353,595 stalled-cycles-backend # 38.31% backend cycles idle [66.94%] - 184,093,021 instructions # 0.86 insns per cycle - # 0.65 stalled cycles per insn [84.65%] - 34,534,006 branches # 465.127 M/sec [83.49%] - 2,022,065 branch-misses # 5.86% of all branches [83.13%] + 75.588706 task-clock # 0.981 CPUs utilized + 148 context-switches # 0.002 M/sec + 47 CPU-migrations # 0.001 M/sec + 4,233 page-faults # 0.056 M/sec + 219,952,171 cycles # 2.910 GHz [83.32%] + 123,320,418 stalled-cycles-frontend # 56.07% frontend cycles idle [85.04%] + 82,988,638 stalled-cycles-backend # 37.73% backend cycles idle [68.62%] + 183,258,921 instructions # 0.83 insns per cycle + # 0.67 stalled cycles per insn [85.04%] + 34,664,677 branches # 458.596 M/sec [83.94%] + 2,046,911 branch-misses # 5.90% of all branches [81.13%] - 0.075715403 seconds time elapsed + 0.077076913 seconds time elapsed Iteration 4 Performance counter stats for 'java MatrixMultiplier 10': - 75.395700 task-clock # 0.977 CPUs utilized - 138 context-switches # 0.002 M/sec - 41 CPU-migrations # 0.001 M/sec - 4,255 page-faults # 0.056 M/sec - 219,917,317 cycles # 2.917 GHz [78.13%] - 121,004,839 stalled-cycles-frontend # 55.02% frontend cycles idle [85.20%] - 81,215,365 stalled-cycles-backend # 36.93% backend cycles idle [68.88%] - 185,224,171 instructions # 0.84 insns per cycle - # 0.65 stalled cycles per insn [84.46%] - 34,656,814 branches # 459.666 M/sec [84.82%] - 2,077,280 branch-misses # 5.99% of all branches [85.34%] + 73.785766 task-clock # 0.983 CPUs utilized + 156 context-switches # 0.002 M/sec + 44 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.058 M/sec + 213,783,464 cycles # 2.897 GHz [83.60%] + 116,799,942 stalled-cycles-frontend # 54.63% frontend cycles idle [78.27%] + 77,769,074 stalled-cycles-backend # 36.38% backend cycles idle [68.20%] + 182,198,452 instructions # 0.85 insns per cycle + # 0.64 stalled cycles per insn [84.73%] + 35,172,437 branches # 476.683 M/sec [84.90%] + 2,002,118 branch-misses # 5.69% of all branches [87.69%] - 0.077180782 seconds time elapsed + 0.075044359 seconds time elapsed Iteration 5 Performance counter stats for 'java MatrixMultiplier 10': - 72.910525 task-clock # 0.962 CPUs utilized - 139 context-switches # 0.002 M/sec - 39 CPU-migrations # 0.001 M/sec - 4,253 page-faults # 0.058 M/sec - 214,855,398 cycles # 2.947 GHz [68.96%] - 117,804,486 stalled-cycles-frontend # 54.83% frontend cycles idle [84.19%] - 78,255,771 stalled-cycles-backend # 36.42% backend cycles idle [71.08%] - 182,706,403 instructions # 0.85 insns per cycle - # 0.64 stalled cycles per insn [89.11%] - 34,836,400 branches # 477.797 M/sec [89.82%] - 2,026,736 branch-misses # 5.82% of all branches [87.97%] + 75.848184 task-clock # 0.963 CPUs utilized + 177 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.001 M/sec + 4,256 page-faults # 0.056 M/sec + 222,550,625 cycles # 2.934 GHz [75.92%] + 122,058,241 stalled-cycles-frontend # 54.85% frontend cycles idle [83.66%] + 82,982,505 stalled-cycles-backend # 37.29% backend cycles idle [68.71%] + 183,204,443 instructions # 0.82 insns per cycle + # 0.67 stalled cycles per insn [84.97%] + 35,265,600 branches # 464.950 M/sec [85.01%] + 2,010,327 branch-misses # 5.70% of all branches [88.69%] - 0.075795774 seconds time elapsed + 0.078736540 seconds time elapsed Iteration 6 Performance counter stats for 'java MatrixMultiplier 10': - 75.648039 task-clock # 0.965 CPUs utilized - 168 context-switches # 0.002 M/sec - 46 CPU-migrations # 0.001 M/sec - 4,235 page-faults # 0.056 M/sec - 218,965,621 cycles # 2.895 GHz [85.82%] - 121,200,611 stalled-cycles-frontend # 55.35% frontend cycles idle [79.76%] - 84,094,158 stalled-cycles-backend # 38.41% backend cycles idle [68.13%] - 185,778,627 instructions # 0.85 insns per cycle - # 0.65 stalled cycles per insn [83.98%] - 34,528,645 branches # 456.438 M/sec [85.01%] - 2,049,866 branch-misses # 5.94% of all branches [83.50%] + 74.552674 task-clock # 0.978 CPUs utilized + 146 context-switches # 0.002 M/sec + 45 CPU-migrations # 0.001 M/sec + 4,252 page-faults # 0.057 M/sec + 220,109,964 cycles # 2.952 GHz [78.21%] + 119,486,246 stalled-cycles-frontend # 54.28% frontend cycles idle [84.59%] + 80,845,000 stalled-cycles-backend # 36.73% backend cycles idle [68.08%] + 184,366,056 instructions # 0.84 insns per cycle + # 0.65 stalled cycles per insn [84.04%] + 34,909,548 branches # 468.253 M/sec [84.14%] + 2,006,308 branch-misses # 5.75% of all branches [87.11%] - 0.078417437 seconds time elapsed + 0.076255589 seconds time elapsed Iteration 7 Performance counter stats for 'java MatrixMultiplier 10': - 74.829798 task-clock # 0.981 CPUs utilized - 157 context-switches # 0.002 M/sec - 41 CPU-migrations # 0.001 M/sec - 4,254 page-faults # 0.057 M/sec - 216,250,673 cycles # 2.890 GHz [84.46%] - 118,790,030 stalled-cycles-frontend # 54.93% frontend cycles idle [77.62%] - 78,834,710 stalled-cycles-backend # 36.46% backend cycles idle [68.42%] - 185,004,511 instructions # 0.86 insns per cycle - # 0.64 stalled cycles per insn [83.84%] - 35,127,218 branches # 469.428 M/sec [85.23%] - 2,014,756 branch-misses # 5.74% of all branches [86.76%] + 74.919822 task-clock # 0.945 CPUs utilized + 136 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.001 M/sec + 4,232 page-faults # 0.056 M/sec + 215,665,200 cycles # 2.879 GHz [79.71%] + 122,784,131 stalled-cycles-frontend # 56.93% frontend cycles idle [86.01%] + 83,019,995 stalled-cycles-backend # 38.49% backend cycles idle [68.15%] + 184,014,109 instructions # 0.85 insns per cycle + # 0.67 stalled cycles per insn [84.43%] + 34,335,055 branches # 458.291 M/sec [85.30%] + 2,017,824 branch-misses # 5.88% of all branches [83.15%] - 0.076248085 seconds time elapsed + 0.079275197 seconds time elapsed Iteration 8 Performance counter stats for 'java MatrixMultiplier 10': - 74.739696 task-clock # 0.980 CPUs utilized - 149 context-switches # 0.002 M/sec - 56 CPU-migrations # 0.001 M/sec - 4,252 page-faults # 0.057 M/sec - 220,490,767 cycles # 2.950 GHz [82.19%] - 121,004,653 stalled-cycles-frontend # 54.88% frontend cycles idle [85.74%] - 81,908,614 stalled-cycles-backend # 37.15% backend cycles idle [67.71%] - 183,789,555 instructions # 0.83 insns per cycle - # 0.66 stalled cycles per insn [84.33%] - 34,221,793 branches # 457.880 M/sec [84.40%] - 2,058,395 branch-misses # 6.01% of all branches [81.97%] + 72.325490 task-clock # 0.978 CPUs utilized + 136 context-switches # 0.002 M/sec + 48 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.059 M/sec + 211,331,070 cycles # 2.922 GHz [83.78%] + 115,877,185 stalled-cycles-frontend # 54.83% frontend cycles idle [77.28%] + 75,513,035 stalled-cycles-backend # 35.73% backend cycles idle [67.46%] + 182,895,864 instructions # 0.87 insns per cycle + # 0.63 stalled cycles per insn [83.79%] + 35,178,485 branches # 486.391 M/sec [85.16%] + 2,003,949 branch-misses # 5.70% of all branches [88.08%] - 0.076228013 seconds time elapsed + 0.073979011 seconds time elapsed Iteration 9 Performance counter stats for 'java MatrixMultiplier 10': - 75.068830 task-clock # 0.977 CPUs utilized - 162 context-switches # 0.002 M/sec - 47 CPU-migrations # 0.001 M/sec - 4,255 page-faults # 0.057 M/sec - 215,412,368 cycles # 2.870 GHz [80.58%] - 121,493,936 stalled-cycles-frontend # 56.40% frontend cycles idle [81.92%] - 81,065,739 stalled-cycles-backend # 37.63% backend cycles idle [71.00%] - 182,646,249 instructions # 0.85 insns per cycle - # 0.67 stalled cycles per insn [84.17%] - 34,559,117 branches # 460.366 M/sec [84.07%] - 1,927,007 branch-misses # 5.58% of all branches [84.32%] + 74.615253 task-clock # 0.984 CPUs utilized + 136 context-switches # 0.002 M/sec + 41 CPU-migrations # 0.001 M/sec + 4,233 page-faults # 0.057 M/sec + 221,217,036 cycles # 2.965 GHz [78.81%] + 120,264,263 stalled-cycles-frontend # 54.36% frontend cycles idle [83.70%] + 80,040,282 stalled-cycles-backend # 36.18% backend cycles idle [68.39%] + 183,237,659 instructions # 0.83 insns per cycle + # 0.66 stalled cycles per insn [85.00%] + 35,439,125 branches # 474.958 M/sec [85.72%] + 2,039,792 branch-misses # 5.76% of all branches [85.90%] - 0.076856148 seconds time elapsed + 0.075848324 seconds time elapsed Iteration 10 Performance counter stats for 'java MatrixMultiplier 10': - 74.789082 task-clock # 0.983 CPUs utilized - 140 context-switches # 0.002 M/sec - 43 CPU-migrations # 0.001 M/sec - 4,233 page-faults # 0.057 M/sec - 221,271,457 cycles # 2.959 GHz [81.80%] - 120,519,135 stalled-cycles-frontend # 54.47% frontend cycles idle [84.80%] - 83,902,857 stalled-cycles-backend # 37.92% backend cycles idle [68.31%] - 183,981,689 instructions # 0.83 insns per cycle - # 0.66 stalled cycles per insn [84.58%] - 34,659,362 branches # 463.428 M/sec [84.13%] - 2,036,761 branch-misses # 5.88% of all branches [82.59%] - - 0.076072401 seconds time elapsed + 74.492689 task-clock # 0.973 CPUs utilized + 150 context-switches # 0.002 M/sec + 41 CPU-migrations # 0.001 M/sec + 4,255 page-faults # 0.057 M/sec + 216,573,426 cycles # 2.907 GHz [84.20%] + 120,115,088 stalled-cycles-frontend # 55.46% frontend cycles idle [79.15%] + 81,505,498 stalled-cycles-backend # 37.63% backend cycles idle [67.75%] + 184,181,901 instructions # 0.85 insns per cycle + # 0.65 stalled cycles per insn [84.02%] + 35,341,758 branches # 474.433 M/sec [84.88%] + 2,031,210 branch-misses # 5.75% of all branches [86.54%] + + 0.076539126 seconds time elapsed diff --git a/sankalp/java_output_100 b/sankalp/java_output_100 index e48a203..5ff2003 100644 --- a/sankalp/java_output_100 +++ b/sankalp/java_output_100 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for 'java MatrixMultiplier 100': - 104.746489 task-clock # 0.826 CPUs utilized - 169 context-switches # 0.002 M/sec - 44 CPU-migrations # 0.000 M/sec - 4,911 page-faults # 0.047 M/sec - 308,572,871 cycles # 2.946 GHz [85.84%] - 163,849,227 stalled-cycles-frontend # 53.10% frontend cycles idle [74.67%] - 110,314,966 stalled-cycles-backend # 35.75% backend cycles idle [65.15%] - 271,187,853 instructions # 0.88 insns per cycle - # 0.60 stalled cycles per insn [88.30%] - 55,037,045 branches # 525.431 M/sec [88.79%] - 2,958,690 branch-misses # 5.38% of all branches [86.56%] - - 0.126846272 seconds time elapsed + 103.321799 task-clock # 1.117 CPUs utilized + 175 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.000 M/sec + 4,909 page-faults # 0.048 M/sec + 301,216,233 cycles # 2.915 GHz [84.32%] + 163,477,231 stalled-cycles-frontend # 54.27% frontend cycles idle [75.05%] + 103,759,590 stalled-cycles-backend # 34.45% backend cycles idle [66.56%] + 297,736,573 instructions # 0.99 insns per cycle + # 0.55 stalled cycles per insn [88.51%] + 55,643,691 branches # 538.547 M/sec [88.33%] + 2,889,288 branch-misses # 5.19% of all branches [87.38%] + + 0.092533164 seconds time elapsed Iteration 2 Performance counter stats for 'java MatrixMultiplier 100': - 105.073416 task-clock # 0.870 CPUs utilized - 162 context-switches # 0.002 M/sec - 42 CPU-migrations # 0.000 M/sec - 4,910 page-faults # 0.047 M/sec - 311,169,627 cycles # 2.961 GHz [82.20%] - 166,008,039 stalled-cycles-frontend # 53.35% frontend cycles idle [86.19%] - 102,128,765 stalled-cycles-backend # 32.82% backend cycles idle [59.47%] - 284,206,036 instructions # 0.91 insns per cycle - # 0.58 stalled cycles per insn [80.84%] - 53,250,065 branches # 506.789 M/sec [86.90%] - 3,065,376 branch-misses # 5.76% of all branches [86.93%] + 103.759715 task-clock # 1.117 CPUs utilized + 160 context-switches # 0.002 M/sec + 47 CPU-migrations # 0.000 M/sec + 4,901 page-faults # 0.047 M/sec + 305,125,075 cycles # 2.941 GHz [84.89%] + 160,369,545 stalled-cycles-frontend # 52.56% frontend cycles idle [74.82%] + 101,259,462 stalled-cycles-backend # 33.19% backend cycles idle [66.10%] + 287,525,333 instructions # 0.94 insns per cycle + # 0.56 stalled cycles per insn [89.35%] + 54,463,632 branches # 524.902 M/sec [88.47%] + 3,052,971 branch-misses # 5.61% of all branches [87.52%] - 0.120828804 seconds time elapsed + 0.092870741 seconds time elapsed Iteration 3 Performance counter stats for 'java MatrixMultiplier 100': - 104.806678 task-clock # 0.827 CPUs utilized - 158 context-switches # 0.002 M/sec + 104.375659 task-clock # 1.059 CPUs utilized + 159 context-switches # 0.002 M/sec 44 CPU-migrations # 0.000 M/sec - 4,905 page-faults # 0.047 M/sec - 307,136,933 cycles # 2.931 GHz [83.58%] - 161,088,688 stalled-cycles-frontend # 52.45% frontend cycles idle [85.57%] - 110,906,935 stalled-cycles-backend # 36.11% backend cycles idle [60.44%] - 284,480,493 instructions # 0.93 insns per cycle - # 0.57 stalled cycles per insn [81.51%] - 53,693,355 branches # 512.309 M/sec [84.70%] - 2,776,226 branch-misses # 5.17% of all branches [87.10%] + 4,899 page-faults # 0.047 M/sec + 312,875,028 cycles # 2.998 GHz [77.06%] + 168,819,560 stalled-cycles-frontend # 53.96% frontend cycles idle [78.79%] + 101,445,345 stalled-cycles-backend # 32.42% backend cycles idle [71.36%] + 301,146,158 instructions # 0.96 insns per cycle + # 0.56 stalled cycles per insn [88.93%] + 55,088,281 branches # 527.789 M/sec [88.62%] + 2,998,976 branch-misses # 5.44% of all branches [85.92%] - 0.126797557 seconds time elapsed + 0.098543800 seconds time elapsed Iteration 4 Performance counter stats for 'java MatrixMultiplier 100': - 102.669223 task-clock # 0.804 CPUs utilized - 174 context-switches # 0.002 M/sec - 43 CPU-migrations # 0.000 M/sec - 4,902 page-faults # 0.048 M/sec - 302,708,684 cycles # 2.948 GHz [84.85%] - 162,212,866 stalled-cycles-frontend # 53.59% frontend cycles idle [75.03%] - 102,355,542 stalled-cycles-backend # 33.81% backend cycles idle [66.93%] - 291,250,018 instructions # 0.96 insns per cycle - # 0.56 stalled cycles per insn [88.35%] - 53,438,847 branches # 520.495 M/sec [88.52%] - 3,076,528 branch-misses # 5.76% of all branches [85.95%] + 106.506564 task-clock # 1.098 CPUs utilized + 150 context-switches # 0.001 M/sec + 44 CPU-migrations # 0.000 M/sec + 4,908 page-faults # 0.046 M/sec + 314,107,537 cycles # 2.949 GHz [80.00%] + 168,129,079 stalled-cycles-frontend # 53.53% frontend cycles idle [83.52%] + 110,233,792 stalled-cycles-backend # 35.09% backend cycles idle [61.34%] + 267,033,663 instructions # 0.85 insns per cycle + # 0.63 stalled cycles per insn [88.69%] + 54,684,767 branches # 513.440 M/sec [89.14%] + 3,034,135 branch-misses # 5.55% of all branches [87.76%] - 0.127664616 seconds time elapsed + 0.096957055 seconds time elapsed Iteration 5 Performance counter stats for 'java MatrixMultiplier 100': - 103.803424 task-clock # 0.776 CPUs utilized - 155 context-switches # 0.001 M/sec - 42 CPU-migrations # 0.000 M/sec - 4,904 page-faults # 0.047 M/sec - 305,255,769 cycles # 2.941 GHz [86.83%] - 162,227,950 stalled-cycles-frontend # 53.14% frontend cycles idle [76.48%] - 107,322,939 stalled-cycles-backend # 35.16% backend cycles idle [64.30%] - 288,359,451 instructions # 0.94 insns per cycle - # 0.56 stalled cycles per insn [87.61%] - 54,283,919 branches # 522.949 M/sec [88.61%] - 3,101,270 branch-misses # 5.71% of all branches [85.20%] + 103.919871 task-clock # 1.123 CPUs utilized + 172 context-switches # 0.002 M/sec + 44 CPU-migrations # 0.000 M/sec + 4,916 page-faults # 0.047 M/sec + 302,840,737 cycles # 2.914 GHz [85.86%] + 156,296,630 stalled-cycles-frontend # 51.61% frontend cycles idle [83.16%] + 112,184,665 stalled-cycles-backend # 37.04% backend cycles idle [58.29%] + 266,462,000 instructions # 0.88 insns per cycle + # 0.59 stalled cycles per insn [80.55%] + 54,001,529 branches # 519.646 M/sec [87.11%] + 2,916,764 branch-misses # 5.40% of all branches [87.46%] - 0.133713233 seconds time elapsed + 0.092571453 seconds time elapsed Iteration 6 Performance counter stats for 'java MatrixMultiplier 100': - 104.215729 task-clock # 0.819 CPUs utilized - 176 context-switches # 0.002 M/sec - 43 CPU-migrations # 0.000 M/sec - 4,912 page-faults # 0.047 M/sec - 302,453,171 cycles # 2.902 GHz [87.65%] - 161,623,945 stalled-cycles-frontend # 53.44% frontend cycles idle [79.60%] - 111,110,021 stalled-cycles-backend # 36.74% backend cycles idle [60.70%] - 271,333,536 instructions # 0.90 insns per cycle - # 0.60 stalled cycles per insn [87.65%] - 53,593,260 branches # 514.253 M/sec [88.57%] - 3,160,617 branch-misses # 5.90% of all branches [85.11%] + 104.129244 task-clock # 1.110 CPUs utilized + 167 context-switches # 0.002 M/sec + 40 CPU-migrations # 0.000 M/sec + 4,901 page-faults # 0.047 M/sec + 305,068,140 cycles # 2.930 GHz [84.38%] + 160,683,594 stalled-cycles-frontend # 52.67% frontend cycles idle [83.04%] + 108,136,607 stalled-cycles-backend # 35.45% backend cycles idle [58.36%] + 262,531,379 instructions # 0.86 insns per cycle + # 0.61 stalled cycles per insn [84.17%] + 52,184,320 branches # 501.150 M/sec [87.66%] + 3,073,796 branch-misses # 5.89% of all branches [88.33%] - 0.127218799 seconds time elapsed + 0.093834708 seconds time elapsed Iteration 7 Performance counter stats for 'java MatrixMultiplier 100': - 104.762398 task-clock # 0.829 CPUs utilized - 156 context-switches # 0.001 M/sec - 46 CPU-migrations # 0.000 M/sec - 4,917 page-faults # 0.047 M/sec - 314,196,799 cycles # 2.999 GHz [70.14%] - 162,936,333 stalled-cycles-frontend # 51.86% frontend cycles idle [79.89%] - 104,204,007 stalled-cycles-backend # 33.17% backend cycles idle [76.48%] - 303,203,042 instructions # 0.97 insns per cycle - # 0.54 stalled cycles per insn [88.72%] - 55,501,074 branches # 529.780 M/sec [88.95%] - 2,874,787 branch-misses # 5.18% of all branches [86.15%] + 104.108239 task-clock # 1.125 CPUs utilized + 157 context-switches # 0.002 M/sec + 42 CPU-migrations # 0.000 M/sec + 4,898 page-faults # 0.047 M/sec + 307,017,870 cycles # 2.949 GHz [84.90%] + 164,306,170 stalled-cycles-frontend # 53.52% frontend cycles idle [77.93%] + 103,125,565 stalled-cycles-backend # 33.59% backend cycles idle [65.45%] + 288,379,192 instructions # 0.94 insns per cycle + # 0.57 stalled cycles per insn [85.16%] + 53,702,811 branches # 515.836 M/sec [86.05%] + 3,044,773 branch-misses # 5.67% of all branches [87.57%] - 0.126417299 seconds time elapsed + 0.092529815 seconds time elapsed Iteration 8 Performance counter stats for 'java MatrixMultiplier 100': - 104.286359 task-clock # 0.818 CPUs utilized - 159 context-switches # 0.002 M/sec - 41 CPU-migrations # 0.000 M/sec - 4,897 page-faults # 0.047 M/sec - 307,665,208 cycles # 2.950 GHz [82.64%] - 161,525,319 stalled-cycles-frontend # 52.50% frontend cycles idle [83.76%] - 104,838,283 stalled-cycles-backend # 34.08% backend cycles idle [60.68%] - 270,038,666 instructions # 0.88 insns per cycle - # 0.60 stalled cycles per insn [84.04%] - 53,152,819 branches # 509.681 M/sec [88.08%] - 3,059,602 branch-misses # 5.76% of all branches [86.23%] - - 0.127504121 seconds time elapsed + 104.765434 task-clock # 1.108 CPUs utilized + 169 context-switches # 0.002 M/sec + 45 CPU-migrations # 0.000 M/sec + 4,901 page-faults # 0.047 M/sec + 307,535,192 cycles # 2.935 GHz [84.82%] + 164,207,959 stalled-cycles-frontend # 53.39% frontend cycles idle [79.74%] + 106,454,679 stalled-cycles-backend # 34.62% backend cycles idle [60.71%] + 268,169,951 instructions # 0.87 insns per cycle + # 0.61 stalled cycles per insn [88.10%] + 54,860,528 branches # 523.651 M/sec [88.40%] + 2,968,193 branch-misses # 5.41% of all branches [87.39%] + + 0.094565311 seconds time elapsed Iteration 9 Performance counter stats for 'java MatrixMultiplier 100': - 103.018370 task-clock # 0.802 CPUs utilized - 172 context-switches # 0.002 M/sec - 47 CPU-migrations # 0.000 M/sec - 4,901 page-faults # 0.048 M/sec - 310,294,538 cycles # 3.012 GHz [75.30%] - 164,104,338 stalled-cycles-frontend # 52.89% frontend cycles idle [78.12%] - 99,758,390 stalled-cycles-backend # 32.15% backend cycles idle [74.32%] - 294,822,518 instructions # 0.95 insns per cycle - # 0.56 stalled cycles per insn [88.55%] - 54,467,883 branches # 528.720 M/sec [88.57%] - 3,033,202 branch-misses # 5.57% of all branches [85.17%] + 105.879995 task-clock # 1.108 CPUs utilized + 181 context-switches # 0.002 M/sec + 49 CPU-migrations # 0.000 M/sec + 4,910 page-faults # 0.046 M/sec + 309,713,266 cycles # 2.925 GHz [86.13%] + 164,906,341 stalled-cycles-frontend # 53.24% frontend cycles idle [79.10%] + 111,500,921 stalled-cycles-backend # 36.00% backend cycles idle [61.51%] + 272,166,057 instructions # 0.88 insns per cycle + # 0.61 stalled cycles per insn [87.40%] + 53,079,383 branches # 501.316 M/sec [88.80%] + 3,046,810 branch-misses # 5.74% of all branches [86.44%] - 0.128460788 seconds time elapsed + 0.095518615 seconds time elapsed Iteration 10 Performance counter stats for 'java MatrixMultiplier 100': - 104.261775 task-clock # 0.826 CPUs utilized - 177 context-switches # 0.002 M/sec - 45 CPU-migrations # 0.000 M/sec - 4,899 page-faults # 0.047 M/sec - 308,568,212 cycles # 2.960 GHz [74.85%] - 166,638,943 stalled-cycles-frontend # 54.00% frontend cycles idle [77.71%] - 106,033,306 stalled-cycles-backend # 34.36% backend cycles idle [74.27%] - 294,497,423 instructions # 0.95 insns per cycle - # 0.57 stalled cycles per insn [88.55%] - 54,219,007 branches # 520.028 M/sec [88.63%] - 3,099,068 branch-misses # 5.72% of all branches [86.05%] - - 0.126175195 seconds time elapsed + 104.498642 task-clock # 1.120 CPUs utilized + 166 context-switches # 0.002 M/sec + 43 CPU-migrations # 0.000 M/sec + 4,912 page-faults # 0.047 M/sec + 306,585,731 cycles # 2.934 GHz [86.11%] + 157,952,036 stalled-cycles-frontend # 51.52% frontend cycles idle [76.53%] + 109,596,785 stalled-cycles-backend # 35.75% backend cycles idle [65.42%] + 284,918,343 instructions # 0.93 insns per cycle + # 0.55 stalled cycles per insn [88.44%] + 54,016,381 branches # 516.910 M/sec [88.80%] + 3,056,357 branch-misses # 5.66% of all branches [85.71%] + + 0.093262045 seconds time elapsed diff --git a/sankalp/java_output_1000 b/sankalp/java_output_1000 index 2676cb0..0e72482 100644 --- a/sankalp/java_output_1000 +++ b/sankalp/java_output_1000 @@ -2,179 +2,179 @@ Iteration 1 Performance counter stats for 'java MatrixMultiplier 1000': - 8189.705765 task-clock # 0.996 CPUs utilized - 345 context-switches # 0.000 M/sec - 52 CPU-migrations # 0.000 M/sec - 7,923 page-faults # 0.001 M/sec - 25,046,613,587 cycles # 3.058 GHz [83.37%] - 19,626,824,372 stalled-cycles-frontend # 78.36% frontend cycles idle [83.30%] - 5,625,217,098 stalled-cycles-backend # 22.46% backend cycles idle [66.65%] - 11,498,998,059 instructions # 0.46 insns per cycle - # 1.71 stalled cycles per insn [83.32%] - 1,584,699,622 branches # 193.499 M/sec [83.30%] - 5,035,668 branch-misses # 0.32% of all branches [83.41%] + 8230.895013 task-clock # 1.000 CPUs utilized + 349 context-switches # 0.000 M/sec + 51 CPU-migrations # 0.000 M/sec + 7,924 page-faults # 0.001 M/sec + 25,179,610,041 cycles # 3.059 GHz [83.31%] + 19,733,923,171 stalled-cycles-frontend # 78.37% frontend cycles idle [83.35%] + 5,763,803,893 stalled-cycles-backend # 22.89% backend cycles idle [66.59%] + 11,489,775,504 instructions # 0.46 insns per cycle + # 1.72 stalled cycles per insn [83.39%] + 1,586,764,019 branches # 192.781 M/sec [83.37%] + 5,033,764 branch-misses # 0.32% of all branches [83.39%] - 8.224508473 seconds time elapsed + 8.231041043 seconds time elapsed Iteration 2 Performance counter stats for 'java MatrixMultiplier 1000': - 8201.366801 task-clock # 1.000 CPUs utilized - 356 context-switches # 0.000 M/sec - 59 CPU-migrations # 0.000 M/sec - 7,926 page-faults # 0.001 M/sec - 25,086,666,257 cycles # 3.059 GHz [83.37%] - 19,653,613,981 stalled-cycles-frontend # 78.34% frontend cycles idle [83.34%] - 6,044,554,119 stalled-cycles-backend # 24.09% backend cycles idle [66.54%] - 11,489,727,013 instructions # 0.46 insns per cycle - # 1.71 stalled cycles per insn [83.27%] - 1,583,699,875 branches # 193.102 M/sec [83.36%] - 4,928,257 branch-misses # 0.31% of all branches [83.41%] + 8259.814170 task-clock # 1.000 CPUs utilized + 348 context-switches # 0.000 M/sec + 49 CPU-migrations # 0.000 M/sec + 7,921 page-faults # 0.001 M/sec + 25,267,638,160 cycles # 3.059 GHz [83.34%] + 19,841,583,037 stalled-cycles-frontend # 78.53% frontend cycles idle [83.21%] + 6,527,775,541 stalled-cycles-backend # 25.83% backend cycles idle [66.69%] + 11,513,813,855 instructions # 0.46 insns per cycle + # 1.72 stalled cycles per insn [83.38%] + 1,587,852,465 branches # 192.238 M/sec [83.37%] + 5,136,172 branch-misses # 0.32% of all branches [83.43%] - 8.201853253 seconds time elapsed + 8.260232738 seconds time elapsed Iteration 3 Performance counter stats for 'java MatrixMultiplier 1000': - 8226.593365 task-clock # 1.000 CPUs utilized - 340 context-switches # 0.000 M/sec - 54 CPU-migrations # 0.000 M/sec + 8194.756476 task-clock # 1.000 CPUs utilized + 375 context-switches # 0.000 M/sec + 52 CPU-migrations # 0.000 M/sec 7,917 page-faults # 0.001 M/sec - 25,167,840,017 cycles # 3.059 GHz [83.36%] - 19,742,382,690 stalled-cycles-frontend # 78.44% frontend cycles idle [83.21%] - 5,920,768,984 stalled-cycles-backend # 23.53% backend cycles idle [66.67%] - 11,483,707,296 instructions # 0.46 insns per cycle - # 1.72 stalled cycles per insn [83.41%] - 1,588,778,433 branches # 193.127 M/sec [83.42%] - 5,038,893 branch-misses # 0.32% of all branches [83.37%] + 25,071,710,073 cycles # 3.059 GHz [83.35%] + 19,634,941,038 stalled-cycles-frontend # 78.32% frontend cycles idle [83.36%] + 5,481,936,492 stalled-cycles-backend # 21.87% backend cycles idle [66.57%] + 11,497,785,971 instructions # 0.46 insns per cycle + # 1.71 stalled cycles per insn [83.26%] + 1,585,354,953 branches # 193.460 M/sec [83.36%] + 4,624,130 branch-misses # 0.29% of all branches [83.37%] - 8.226387546 seconds time elapsed + 8.195429601 seconds time elapsed Iteration 4 Performance counter stats for 'java MatrixMultiplier 1000': - 8260.429550 task-clock # 0.999 CPUs utilized - 344 context-switches # 0.000 M/sec - 46 CPU-migrations # 0.000 M/sec - 7,927 page-faults # 0.001 M/sec - 25,262,443,048 cycles # 3.058 GHz [83.37%] - 19,840,033,810 stalled-cycles-frontend # 78.54% frontend cycles idle [83.39%] - 6,387,603,184 stalled-cycles-backend # 25.28% backend cycles idle [66.64%] - 11,499,888,815 instructions # 0.46 insns per cycle - # 1.73 stalled cycles per insn [83.25%] - 1,583,378,720 branches # 191.682 M/sec [83.26%] - 4,695,896 branch-misses # 0.30% of all branches [83.36%] - - 8.265790251 seconds time elapsed + 8289.424086 task-clock # 1.000 CPUs utilized + 351 context-switches # 0.000 M/sec + 50 CPU-migrations # 0.000 M/sec + 7,924 page-faults # 0.001 M/sec + 25,357,184,443 cycles # 3.059 GHz [83.40%] + 19,924,099,338 stalled-cycles-frontend # 78.57% frontend cycles idle [83.30%] + 5,665,731,789 stalled-cycles-backend # 22.34% backend cycles idle [66.62%] + 11,497,881,177 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.36%] + 1,586,295,230 branches # 191.364 M/sec [83.35%] + 4,864,960 branch-misses # 0.31% of all branches [83.36%] + + 8.289548645 seconds time elapsed Iteration 5 Performance counter stats for 'java MatrixMultiplier 1000': - 8232.352704 task-clock # 1.000 CPUs utilized - 355 context-switches # 0.000 M/sec - 46 CPU-migrations # 0.000 M/sec - 7,920 page-faults # 0.001 M/sec - 25,181,505,621 cycles # 3.059 GHz [83.38%] - 19,745,669,605 stalled-cycles-frontend # 78.41% frontend cycles idle [83.37%] - 5,924,168,083 stalled-cycles-backend # 23.53% backend cycles idle [66.59%] - 11,492,938,290 instructions # 0.46 insns per cycle - # 1.72 stalled cycles per insn [83.24%] - 1,584,816,005 branches # 192.511 M/sec [83.28%] - 4,682,747 branch-misses # 0.30% of all branches [83.40%] - - 8.233134328 seconds time elapsed + 8295.510219 task-clock # 1.000 CPUs utilized + 344 context-switches # 0.000 M/sec + 45 CPU-migrations # 0.000 M/sec + 7,914 page-faults # 0.001 M/sec + 25,375,983,015 cycles # 3.059 GHz [83.35%] + 19,927,717,481 stalled-cycles-frontend # 78.53% frontend cycles idle [83.38%] + 5,775,388,326 stalled-cycles-backend # 22.76% backend cycles idle [66.64%] + 11,510,303,341 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.34%] + 1,584,048,659 branches # 190.953 M/sec [83.33%] + 4,757,923 branch-misses # 0.30% of all branches [83.34%] + + 8.295597912 seconds time elapsed Iteration 6 Performance counter stats for 'java MatrixMultiplier 1000': - 8234.639802 task-clock # 1.000 CPUs utilized - 334 context-switches # 0.000 M/sec - 52 CPU-migrations # 0.000 M/sec - 7,915 page-faults # 0.001 M/sec - 25,205,280,543 cycles # 3.061 GHz [83.31%] - 19,763,735,705 stalled-cycles-frontend # 78.41% frontend cycles idle [83.26%] - 5,702,847,993 stalled-cycles-backend # 22.63% backend cycles idle [66.67%] - 11,500,159,783 instructions # 0.46 insns per cycle - # 1.72 stalled cycles per insn [83.38%] - 1,590,468,625 branches # 193.144 M/sec [83.43%] - 4,924,546 branch-misses # 0.31% of all branches [83.36%] + 8279.033166 task-clock # 1.000 CPUs utilized + 338 context-switches # 0.000 M/sec + 49 CPU-migrations # 0.000 M/sec + 7,921 page-faults # 0.001 M/sec + 25,325,475,010 cycles # 3.059 GHz [83.38%] + 19,889,516,972 stalled-cycles-frontend # 78.54% frontend cycles idle [83.24%] + 6,028,939,780 stalled-cycles-backend # 23.81% backend cycles idle [66.57%] + 11,486,000,036 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.42%] + 1,588,599,157 branches # 191.882 M/sec [83.43%] + 4,920,256 branch-misses # 0.31% of all branches [83.40%] - 8.235079008 seconds time elapsed + 8.280647843 seconds time elapsed Iteration 7 Performance counter stats for 'java MatrixMultiplier 1000': - 8194.075299 task-clock # 1.000 CPUs utilized - 342 context-switches # 0.000 M/sec - 52 CPU-migrations # 0.000 M/sec - 7,930 page-faults # 0.001 M/sec - 25,069,438,902 cycles # 3.059 GHz [83.31%] - 19,660,679,181 stalled-cycles-frontend # 78.42% frontend cycles idle [83.31%] - 5,702,572,352 stalled-cycles-backend # 22.75% backend cycles idle [66.64%] - 11,484,198,067 instructions # 0.46 insns per cycle - # 1.71 stalled cycles per insn [83.35%] - 1,587,837,333 branches # 193.779 M/sec [83.38%] - 5,038,709 branch-misses # 0.32% of all branches [83.39%] + 8238.691565 task-clock # 0.979 CPUs utilized + 350 context-switches # 0.000 M/sec + 60 CPU-migrations # 0.000 M/sec + 7,908 page-faults # 0.001 M/sec + 25,200,142,210 cycles # 3.059 GHz [83.36%] + 19,774,075,612 stalled-cycles-frontend # 78.47% frontend cycles idle [83.35%] + 5,690,178,267 stalled-cycles-backend # 22.58% backend cycles idle [66.56%] + 11,493,062,103 instructions # 0.46 insns per cycle + # 1.72 stalled cycles per insn [83.30%] + 1,590,593,567 branches # 193.064 M/sec [83.37%] + 4,824,786 branch-misses # 0.30% of all branches [83.40%] - 8.193395991 seconds time elapsed + 8.418598063 seconds time elapsed Iteration 8 Performance counter stats for 'java MatrixMultiplier 1000': - 8198.617619 task-clock # 1.000 CPUs utilized - 346 context-switches # 0.000 M/sec - 57 CPU-migrations # 0.000 M/sec - 7,921 page-faults # 0.001 M/sec - 25,080,914,449 cycles # 3.059 GHz [83.34%] - 19,639,873,745 stalled-cycles-frontend # 78.31% frontend cycles idle [83.40%] - 5,508,518,414 stalled-cycles-backend # 21.96% backend cycles idle [66.56%] - 11,499,212,780 instructions # 0.46 insns per cycle - # 1.71 stalled cycles per insn [83.33%] - 1,588,578,794 branches # 193.762 M/sec [83.36%] - 5,123,825 branch-misses # 0.32% of all branches [83.37%] + 8261.292085 task-clock # 1.000 CPUs utilized + 363 context-switches # 0.000 M/sec + 47 CPU-migrations # 0.000 M/sec + 7,924 page-faults # 0.001 M/sec + 25,267,234,464 cycles # 3.059 GHz [83.38%] + 19,828,388,563 stalled-cycles-frontend # 78.47% frontend cycles idle [83.37%] + 5,913,581,026 stalled-cycles-backend # 23.40% backend cycles idle [66.54%] + 11,491,742,369 instructions # 0.45 insns per cycle + # 1.73 stalled cycles per insn [83.26%] + 1,584,928,928 branches # 191.850 M/sec [83.35%] + 4,710,317 branch-misses # 0.30% of all branches [83.39%] - 8.199200929 seconds time elapsed + 8.263920402 seconds time elapsed Iteration 9 Performance counter stats for 'java MatrixMultiplier 1000': - 8309.905416 task-clock # 0.994 CPUs utilized - 342 context-switches # 0.000 M/sec - 55 CPU-migrations # 0.000 M/sec + 8159.608685 task-clock # 1.000 CPUs utilized + 352 context-switches # 0.000 M/sec + 44 CPU-migrations # 0.000 M/sec 7,922 page-faults # 0.001 M/sec - 25,422,985,224 cycles # 3.059 GHz [83.40%] - 19,982,855,266 stalled-cycles-frontend # 78.60% frontend cycles idle [83.28%] - 5,648,979,964 stalled-cycles-backend # 22.22% backend cycles idle [66.57%] - 11,501,679,926 instructions # 0.45 insns per cycle - # 1.74 stalled cycles per insn [83.39%] - 1,587,673,401 branches # 191.058 M/sec [83.39%] - 4,705,502 branch-misses # 0.30% of all branches [83.38%] + 24,966,244,868 cycles # 3.060 GHz [83.38%] + 19,528,618,282 stalled-cycles-frontend # 78.22% frontend cycles idle [83.38%] + 5,211,081,380 stalled-cycles-backend # 20.87% backend cycles idle [66.60%] + 11,505,569,740 instructions # 0.46 insns per cycle + # 1.70 stalled cycles per insn [83.31%] + 1,582,342,745 branches # 193.924 M/sec [83.29%] + 4,717,778 branch-misses # 0.30% of all branches [83.38%] - 8.363947305 seconds time elapsed + 8.161167189 seconds time elapsed Iteration 10 Performance counter stats for 'java MatrixMultiplier 1000': - 8272.681053 task-clock # 1.000 CPUs utilized - 364 context-switches # 0.000 M/sec - 53 CPU-migrations # 0.000 M/sec - 7,924 page-faults # 0.001 M/sec - 25,307,211,479 cycles # 3.059 GHz [83.31%] - 19,865,790,949 stalled-cycles-frontend # 78.50% frontend cycles idle [83.33%] - 6,055,880,576 stalled-cycles-backend # 23.93% backend cycles idle [66.64%] - 11,504,664,919 instructions # 0.45 insns per cycle - # 1.73 stalled cycles per insn [83.37%] - 1,586,728,612 branches # 191.803 M/sec [83.35%] - 4,551,682 branch-misses # 0.29% of all branches [83.37%] - - 8.273447669 seconds time elapsed + 8325.978080 task-clock # 1.000 CPUs utilized + 336 context-switches # 0.000 M/sec + 51 CPU-migrations # 0.000 M/sec + 7,920 page-faults # 0.001 M/sec + 25,465,415,040 cycles # 3.059 GHz [83.39%] + 20,014,451,796 stalled-cycles-frontend # 78.59% frontend cycles idle [83.35%] + 5,716,481,850 stalled-cycles-backend # 22.45% backend cycles idle [66.57%] + 11,496,878,405 instructions # 0.45 insns per cycle + # 1.74 stalled cycles per insn [83.29%] + 1,583,971,382 branches # 190.244 M/sec [83.35%] + 4,654,654 branch-misses # 0.29% of all branches [83.37%] + + 8.327184267 seconds time elapsed diff --git a/sankalp/java_output_10000 b/sankalp/java_output_10000 deleted file mode 100644 index 78f379f..0000000 --- a/sankalp/java_output_10000 +++ /dev/null @@ -1 +0,0 @@ -Iteration 1 diff --git a/sankalp/java_output_3000 b/sankalp/java_output_3000 new file mode 100644 index 0000000..bcb0278 --- /dev/null +++ b/sankalp/java_output_3000 @@ -0,0 +1,180 @@ +Iteration 1 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 570246.408556 task-clock # 0.998 CPUs utilized + 13,594 context-switches # 0.000 M/sec + 67 CPU-migrations # 0.000 M/sec + 55,723 page-faults # 0.000 M/sec + 1,745,855,585,043 cycles # 3.062 GHz [83.33%] + 1,586,556,006,830 stalled-cycles-frontend # 90.88% frontend cycles idle [83.33%] + 667,253,690,712 stalled-cycles-backend # 38.22% backend cycles idle [66.66%] + 299,663,523,851 instructions # 0.17 insns per cycle + # 5.29 stalled cycles per insn [83.33%] + 40,925,007,356 branches # 71.767 M/sec [83.34%] + 21,839,815 branch-misses # 0.05% of all branches [83.34%] + + 571.200174838 seconds time elapsed + +Iteration 2 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 526618.685030 task-clock # 0.998 CPUs utilized + 12,618 context-switches # 0.000 M/sec + 65 CPU-migrations # 0.000 M/sec + 54,849 page-faults # 0.000 M/sec + 1,612,309,554,009 cycles # 3.062 GHz [83.33%] + 1,453,911,024,610 stalled-cycles-frontend # 90.18% frontend cycles idle [83.33%] + 604,419,114,381 stalled-cycles-backend # 37.49% backend cycles idle [66.67%] + 299,550,354,418 instructions # 0.19 insns per cycle + # 4.85 stalled cycles per insn [83.34%] + 40,909,432,652 branches # 77.683 M/sec [83.33%] + 21,245,062 branch-misses # 0.05% of all branches [83.33%] + + 527.548551694 seconds time elapsed + +Iteration 3 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 535466.680458 task-clock # 0.998 CPUs utilized + 12,846 context-switches # 0.000 M/sec + 62 CPU-migrations # 0.000 M/sec + 54,956 page-faults # 0.000 M/sec + 1,639,046,450,367 cycles # 3.061 GHz [83.34%] + 1,480,430,768,242 stalled-cycles-frontend # 90.32% frontend cycles idle [83.33%] + 601,908,499,977 stalled-cycles-backend # 36.72% backend cycles idle [66.66%] + 299,553,327,633 instructions # 0.18 insns per cycle + # 4.94 stalled cycles per insn [83.34%] + 40,910,210,705 branches # 76.401 M/sec [83.33%] + 20,911,209 branch-misses # 0.05% of all branches [83.34%] + + 536.698723828 seconds time elapsed + +Iteration 4 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 552650.707551 task-clock # 0.997 CPUs utilized + 12,976 context-switches # 0.000 M/sec + 77 CPU-migrations # 0.000 M/sec + 55,114 page-faults # 0.000 M/sec + 1,691,511,828,074 cycles # 3.061 GHz [83.33%] + 1,532,208,381,110 stalled-cycles-frontend # 90.58% frontend cycles idle [83.33%] + 621,744,344,523 stalled-cycles-backend # 36.76% backend cycles idle [66.67%] + 299,846,020,001 instructions # 0.18 insns per cycle + # 5.11 stalled cycles per insn [83.34%] + 40,958,238,817 branches # 74.112 M/sec [83.33%] + 21,459,929 branch-misses # 0.05% of all branches [83.33%] + + 554.276915012 seconds time elapsed + +Iteration 5 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 556814.234121 task-clock # 0.998 CPUs utilized + 13,237 context-switches # 0.000 M/sec + 70 CPU-migrations # 0.000 M/sec + 55,552 page-faults # 0.000 M/sec + 1,704,664,130,406 cycles # 3.061 GHz [83.34%] + 1,545,675,582,807 stalled-cycles-frontend # 90.67% frontend cycles idle [83.33%] + 633,094,029,679 stalled-cycles-backend # 37.14% backend cycles idle [66.67%] + 299,667,160,749 instructions # 0.18 insns per cycle + # 5.16 stalled cycles per insn [83.33%] + 40,929,141,875 branches # 73.506 M/sec [83.33%] + 21,024,237 branch-misses # 0.05% of all branches [83.33%] + + 557.952865517 seconds time elapsed + +Iteration 6 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 560832.876628 task-clock # 0.998 CPUs utilized + 12,793 context-switches # 0.000 M/sec + 77 CPU-migrations # 0.000 M/sec + 55,513 page-faults # 0.000 M/sec + 1,713,840,185,302 cycles # 3.056 GHz [83.34%] + 1,555,002,421,280 stalled-cycles-frontend # 90.73% frontend cycles idle [83.33%] + 637,113,380,734 stalled-cycles-backend # 37.17% backend cycles idle [66.66%] + 299,715,387,470 instructions # 0.17 insns per cycle + # 5.19 stalled cycles per insn [83.33%] + 40,932,284,030 branches # 72.985 M/sec [83.33%] + 21,650,917 branch-misses # 0.05% of all branches [83.34%] + + 561.996199196 seconds time elapsed + +Iteration 7 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 550567.500826 task-clock # 0.998 CPUs utilized + 12,798 context-switches # 0.000 M/sec + 82 CPU-migrations # 0.000 M/sec + 55,289 page-faults # 0.000 M/sec + 1,684,502,992,534 cycles # 3.060 GHz [83.34%] + 1,525,773,333,416 stalled-cycles-frontend # 90.58% frontend cycles idle [83.34%] + 624,056,379,930 stalled-cycles-backend # 37.05% backend cycles idle [66.66%] + 299,717,851,374 instructions # 0.18 insns per cycle + # 5.09 stalled cycles per insn [83.33%] + 40,933,551,765 branches # 74.348 M/sec [83.33%] + 21,857,346 branch-misses # 0.05% of all branches [83.33%] + + 551.751018706 seconds time elapsed + +Iteration 8 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 555494.391825 task-clock # 0.998 CPUs utilized + 12,852 context-switches # 0.000 M/sec + 58 CPU-migrations # 0.000 M/sec + 55,583 page-faults # 0.000 M/sec + 1,698,991,939,785 cycles # 3.059 GHz [83.34%] + 1,540,286,255,055 stalled-cycles-frontend # 90.66% frontend cycles idle [83.33%] + 640,738,068,956 stalled-cycles-backend # 37.71% backend cycles idle [66.66%] + 299,551,757,727 instructions # 0.18 insns per cycle + # 5.14 stalled cycles per insn [83.33%] + 40,913,471,509 branches # 73.652 M/sec [83.33%] + 20,264,283 branch-misses # 0.05% of all branches [83.34%] + + 556.645024828 seconds time elapsed + +Iteration 9 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 546762.798329 task-clock # 0.998 CPUs utilized + 13,058 context-switches # 0.000 M/sec + 64 CPU-migrations # 0.000 M/sec + 55,257 page-faults # 0.000 M/sec + 1,673,716,383,400 cycles # 3.061 GHz [83.33%] + 1,514,986,220,797 stalled-cycles-frontend # 90.52% frontend cycles idle [83.33%] + 625,080,656,461 stalled-cycles-backend # 37.35% backend cycles idle [66.66%] + 299,613,800,760 instructions # 0.18 insns per cycle + # 5.06 stalled cycles per insn [83.33%] + 40,915,862,013 branches # 74.833 M/sec [83.34%] + 20,910,287 branch-misses # 0.05% of all branches [83.34%] + + 547.893249032 seconds time elapsed + +Iteration 10 + + Performance counter stats for 'java MatrixMultiplier 3000': + + 548287.545724 task-clock # 0.998 CPUs utilized + 12,824 context-switches # 0.000 M/sec + 78 CPU-migrations # 0.000 M/sec + 55,174 page-faults # 0.000 M/sec + 1,678,345,846,454 cycles # 3.061 GHz [83.33%] + 1,519,483,980,886 stalled-cycles-frontend # 90.53% frontend cycles idle [83.34%] + 617,643,582,895 stalled-cycles-backend # 36.80% backend cycles idle [66.66%] + 299,920,288,315 instructions # 0.18 insns per cycle + # 5.07 stalled cycles per insn [83.33%] + 40,977,709,387 branches # 74.738 M/sec [83.33%] + 22,271,141 branch-misses # 0.05% of all branches [83.34%] + + 549.477799232 seconds time elapsed + diff --git a/sankalp/matrixMultiply_clang_optimized b/sankalp/matrixMultiply_clang_optimized new file mode 100755 index 0000000..cec7923 Binary files /dev/null and b/sankalp/matrixMultiply_clang_optimized differ diff --git a/sankalp/matrixMultiply_gcc_optimized b/sankalp/matrixMultiply_gcc_optimized new file mode 100755 index 0000000..42919cf Binary files /dev/null and b/sankalp/matrixMultiply_gcc_optimized differ diff --git a/sankalp/matrixMultiply_mod.c b/sankalp/matrixMultiply_mod.c new file mode 100644 index 0000000..99c3b69 --- /dev/null +++ b/sankalp/matrixMultiply_mod.c @@ -0,0 +1,157 @@ +#include +#include +#include +#include +#include + +static int **matrixA; +static int **matrixB; +static int **matrixC; +static int N; + +static int randomMaxValue = 100; + +void setUpMatrices() { + + int i = 0; + + matrixA = malloc(sizeof(int *) * N); + matrixB = malloc(sizeof(int *) * N); + matrixC = malloc(sizeof(int *) * N); + + + + for (i=0 ; i " +if [ "$#" -ne 7 ]; then + echo "Usage: bash $0 <# of integers> " exit fi @@ -11,6 +11,9 @@ STR_C_GCC="gcc_output_$1" STR_JAVA="java_output_$1" STR_PYTHON="python_output_$1" STR_C_CLANG="clang_output_$1" +STR_C_GCC_OPTIMIZED="gcc_output_optimized_$1" +STR_C_CLANG_OPTIMIZED="clang_output_optimized_$1" + for i in {1..10} do @@ -18,10 +21,15 @@ do echo "Iteration $i" >> $STR_JAVA echo "Iteration $i" >> $STR_PYTHON echo "Iteration $i" >> $STR_C_CLANG + echo "Iteration $1" >> $STR_C_GCC_OPTIMIZED + echo "Iteration $1" >> $STR_C_CLANG_OPTIMIZED + 3>>$STR_C_GCC perf stat --log-fd 3 ./$2 $1 > /dev/null 3>>$STR_JAVA perf stat --log-fd 3 java $3 $1 > /dev/null 3>>$STR_PYTHON perf stat --log-fd 3 python $4 $1 > /dev/null 3>>$STR_C_CLANG perf stat --log-fd 3 ./$5 $1 > /dev/null + 3>>$STR_C_GCC_OPTIMIZED perf stat --log-fd 3 ./$6 $1 > /dev/null + 3>>$STR_C_CLANG_OPTIMIZED perf stat --log-fd 3 ./$7 $1 > /dev/null done diff --git a/sankalp/threading/matrixMultiply_clang b/sankalp/threading/matrixMultiply_clang new file mode 100755 index 0000000..fc4eb79 Binary files /dev/null and b/sankalp/threading/matrixMultiply_clang differ diff --git a/script.sh b/script.sh new file mode 100644 index 0000000..d780eec --- /dev/null +++ b/script.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +if [ "$#" -ne 4 ]; then + echo "Usage: bash $0 <# of integers> " + exit +fi + + + +STR_C_GCC="gcc_output_$1" +STR_JAVA="java_output_$1" +#STR_PYTHON="python_output_$1" +STR_C_CLANG="clang_output_$1" +for i in {1..10} +do + + echo "Iteration $i" >> $STR_C_GCC + echo "Iteration $i" >> $STR_JAVA + #echo "Iteration $i" >> $STR_PYTHON + echo "Iteration $i" >> $STR_C_CLANG + 3>>$STR_C_GCC perf stat --log-fd 3 ./$2 $1 > /dev/null + 3>>$STR_JAVA perf stat --log-fd 3 java $3 $1 > /dev/null + 3>>$STR_C_CLANG perf stat --log-fd 3 ./$4 $1 > /dev/null +done