Page MenuHomec4science

No OneTemporary

File Metadata

Created
Thu, Nov 21, 18:01
diff --git a/src/performance_measurement/figures/bandwidth.pdf b/src/performance_measurement/figures/bandwidth.pdf
new file mode 100644
index 0000000..1a397f4
Binary files /dev/null and b/src/performance_measurement/figures/bandwidth.pdf differ
diff --git a/src/performance_measurement/figures/bandwidth.svg b/src/performance_measurement/figures/bandwidth.svg
new file mode 100644
index 0000000..1a9451c
--- /dev/null
+++ b/src/performance_measurement/figures/bandwidth.svg
@@ -0,0 +1,1141 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="460.8pt" height="345.6pt" viewBox="0 0 460.8 345.6" xmlns="http://www.w3.org/2000/svg" version="1.1">
+ <metadata>
+ <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+ <cc:Work>
+ <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+ <dc:date>2022-11-14T11:41:32.643432</dc:date>
+ <dc:format>image/svg+xml</dc:format>
+ <dc:creator>
+ <cc:Agent>
+ <dc:title>Matplotlib v3.5.2, https://matplotlib.org/</dc:title>
+ </cc:Agent>
+ </dc:creator>
+ </cc:Work>
+ </rdf:RDF>
+ </metadata>
+ <defs>
+ <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
+ </defs>
+ <g id="figure_1">
+ <g id="patch_1">
+ <path d="M 0 345.6
+L 460.8 345.6
+L 460.8 0
+L 0 0
+z
+" style="fill: #ffffff"/>
+ </g>
+ <g id="axes_1">
+ <g id="patch_2">
+ <path d="M 57.6 307.584
+L 414.72 307.584
+L 414.72 41.472
+L 57.6 41.472
+z
+" style="fill: #ffffff"/>
+ </g>
+ <g id="matplotlib.axis_1">
+ <g id="xtick_1">
+ <g id="line2d_1">
+ <defs>
+ <path id="m397accf60b" d="M 0 0
+L 0 3.5
+" style="stroke: #000000; stroke-width: 0.8"/>
+ </defs>
+ <g>
+ <use xlink:href="#m397accf60b" x="112.02738" y="307.584" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_1">
+ <!-- $\mathdefault{2^{3}}$ -->
+ <g transform="translate(106.42738 322.182437)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-32" d="M 1228 531
+L 3431 531
+L 3431 0
+L 469 0
+L 469 531
+Q 828 903 1448 1529
+Q 2069 2156 2228 2338
+Q 2531 2678 2651 2914
+Q 2772 3150 2772 3378
+Q 2772 3750 2511 3984
+Q 2250 4219 1831 4219
+Q 1534 4219 1204 4116
+Q 875 4013 500 3803
+L 500 4441
+Q 881 4594 1212 4672
+Q 1544 4750 1819 4750
+Q 2544 4750 2975 4387
+Q 3406 4025 3406 3419
+Q 3406 3131 3298 2873
+Q 3191 2616 2906 2266
+Q 2828 2175 2409 1742
+Q 1991 1309 1228 531
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-33" d="M 2597 2516
+Q 3050 2419 3304 2112
+Q 3559 1806 3559 1356
+Q 3559 666 3084 287
+Q 2609 -91 1734 -91
+Q 1441 -91 1130 -33
+Q 819 25 488 141
+L 488 750
+Q 750 597 1062 519
+Q 1375 441 1716 441
+Q 2309 441 2620 675
+Q 2931 909 2931 1356
+Q 2931 1769 2642 2001
+Q 2353 2234 1838 2234
+L 1294 2234
+L 1294 2753
+L 1863 2753
+Q 2328 2753 2575 2939
+Q 2822 3125 2822 3475
+Q 2822 3834 2567 4026
+Q 2313 4219 1838 4219
+Q 1578 4219 1281 4162
+Q 984 4106 628 3988
+L 628 4550
+Q 988 4650 1302 4700
+Q 1616 4750 1894 4750
+Q 2613 4750 3031 4423
+Q 3450 4097 3450 3541
+Q 3450 3153 3228 2886
+Q 3006 2619 2597 2516
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-32" transform="translate(0 0.765625)"/>
+ <use xlink:href="#DejaVuSans-33" transform="translate(64.580078 39.046875)scale(0.7)"/>
+ </g>
+ </g>
+ </g>
+ <g id="xtick_2">
+ <g id="line2d_2">
+ <g>
+ <use xlink:href="#m397accf60b" x="169.319358" y="307.584" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_2">
+ <!-- $\mathdefault{2^{6}}$ -->
+ <g transform="translate(163.719358 322.182437)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-36" d="M 2113 2584
+Q 1688 2584 1439 2293
+Q 1191 2003 1191 1497
+Q 1191 994 1439 701
+Q 1688 409 2113 409
+Q 2538 409 2786 701
+Q 3034 994 3034 1497
+Q 3034 2003 2786 2293
+Q 2538 2584 2113 2584
+z
+M 3366 4563
+L 3366 3988
+Q 3128 4100 2886 4159
+Q 2644 4219 2406 4219
+Q 1781 4219 1451 3797
+Q 1122 3375 1075 2522
+Q 1259 2794 1537 2939
+Q 1816 3084 2150 3084
+Q 2853 3084 3261 2657
+Q 3669 2231 3669 1497
+Q 3669 778 3244 343
+Q 2819 -91 2113 -91
+Q 1303 -91 875 529
+Q 447 1150 447 2328
+Q 447 3434 972 4092
+Q 1497 4750 2381 4750
+Q 2619 4750 2861 4703
+Q 3103 4656 3366 4563
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-32" transform="translate(0 0.765625)"/>
+ <use xlink:href="#DejaVuSans-36" transform="translate(64.580078 39.046875)scale(0.7)"/>
+ </g>
+ </g>
+ </g>
+ <g id="xtick_3">
+ <g id="line2d_3">
+ <g>
+ <use xlink:href="#m397accf60b" x="226.611337" y="307.584" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_3">
+ <!-- $\mathdefault{2^{9}}$ -->
+ <g transform="translate(221.011337 322.182437)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-39" d="M 703 97
+L 703 672
+Q 941 559 1184 500
+Q 1428 441 1663 441
+Q 2288 441 2617 861
+Q 2947 1281 2994 2138
+Q 2813 1869 2534 1725
+Q 2256 1581 1919 1581
+Q 1219 1581 811 2004
+Q 403 2428 403 3163
+Q 403 3881 828 4315
+Q 1253 4750 1959 4750
+Q 2769 4750 3195 4129
+Q 3622 3509 3622 2328
+Q 3622 1225 3098 567
+Q 2575 -91 1691 -91
+Q 1453 -91 1209 -44
+Q 966 3 703 97
+z
+M 1959 2075
+Q 2384 2075 2632 2365
+Q 2881 2656 2881 3163
+Q 2881 3666 2632 3958
+Q 2384 4250 1959 4250
+Q 1534 4250 1286 3958
+Q 1038 3666 1038 3163
+Q 1038 2656 1286 2365
+Q 1534 2075 1959 2075
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-32" transform="translate(0 0.765625)"/>
+ <use xlink:href="#DejaVuSans-39" transform="translate(64.580078 39.046875)scale(0.7)"/>
+ </g>
+ </g>
+ </g>
+ <g id="xtick_4">
+ <g id="line2d_4">
+ <g>
+ <use xlink:href="#m397accf60b" x="283.903316" y="307.584" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_4">
+ <!-- $\mathdefault{2^{12}}$ -->
+ <g transform="translate(276.053316 322.182437)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-31" d="M 794 531
+L 1825 531
+L 1825 4091
+L 703 3866
+L 703 4441
+L 1819 4666
+L 2450 4666
+L 2450 531
+L 3481 531
+L 3481 0
+L 794 0
+L 794 531
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-32" transform="translate(0 0.765625)"/>
+ <use xlink:href="#DejaVuSans-31" transform="translate(64.580078 39.046875)scale(0.7)"/>
+ <use xlink:href="#DejaVuSans-32" transform="translate(109.116211 39.046875)scale(0.7)"/>
+ </g>
+ </g>
+ </g>
+ <g id="xtick_5">
+ <g id="line2d_5">
+ <g>
+ <use xlink:href="#m397accf60b" x="341.195294" y="307.584" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_5">
+ <!-- $\mathdefault{2^{15}}$ -->
+ <g transform="translate(333.345294 322.182437)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-35" d="M 691 4666
+L 3169 4666
+L 3169 4134
+L 1269 4134
+L 1269 2991
+Q 1406 3038 1543 3061
+Q 1681 3084 1819 3084
+Q 2600 3084 3056 2656
+Q 3513 2228 3513 1497
+Q 3513 744 3044 326
+Q 2575 -91 1722 -91
+Q 1428 -91 1123 -41
+Q 819 9 494 109
+L 494 744
+Q 775 591 1075 516
+Q 1375 441 1709 441
+Q 2250 441 2565 725
+Q 2881 1009 2881 1497
+Q 2881 1984 2565 2268
+Q 2250 2553 1709 2553
+Q 1456 2553 1204 2497
+Q 953 2441 691 2322
+L 691 4666
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-32" transform="translate(0 0.684375)"/>
+ <use xlink:href="#DejaVuSans-31" transform="translate(64.580078 38.965625)scale(0.7)"/>
+ <use xlink:href="#DejaVuSans-35" transform="translate(109.116211 38.965625)scale(0.7)"/>
+ </g>
+ </g>
+ </g>
+ <g id="xtick_6">
+ <g id="line2d_6">
+ <g>
+ <use xlink:href="#m397accf60b" x="398.487273" y="307.584" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_6">
+ <!-- $\mathdefault{2^{18}}$ -->
+ <g transform="translate(390.637273 322.182437)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-38" d="M 2034 2216
+Q 1584 2216 1326 1975
+Q 1069 1734 1069 1313
+Q 1069 891 1326 650
+Q 1584 409 2034 409
+Q 2484 409 2743 651
+Q 3003 894 3003 1313
+Q 3003 1734 2745 1975
+Q 2488 2216 2034 2216
+z
+M 1403 2484
+Q 997 2584 770 2862
+Q 544 3141 544 3541
+Q 544 4100 942 4425
+Q 1341 4750 2034 4750
+Q 2731 4750 3128 4425
+Q 3525 4100 3525 3541
+Q 3525 3141 3298 2862
+Q 3072 2584 2669 2484
+Q 3125 2378 3379 2068
+Q 3634 1759 3634 1313
+Q 3634 634 3220 271
+Q 2806 -91 2034 -91
+Q 1263 -91 848 271
+Q 434 634 434 1313
+Q 434 1759 690 2068
+Q 947 2378 1403 2484
+z
+M 1172 3481
+Q 1172 3119 1398 2916
+Q 1625 2713 2034 2713
+Q 2441 2713 2670 2916
+Q 2900 3119 2900 3481
+Q 2900 3844 2670 4047
+Q 2441 4250 2034 4250
+Q 1625 4250 1398 4047
+Q 1172 3844 1172 3481
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-32" transform="translate(0 0.765625)"/>
+ <use xlink:href="#DejaVuSans-31" transform="translate(64.580078 39.046875)scale(0.7)"/>
+ <use xlink:href="#DejaVuSans-38" transform="translate(109.116211 39.046875)scale(0.7)"/>
+ </g>
+ </g>
+ </g>
+ <g id="text_7">
+ <!-- array size [kiB] -->
+ <g transform="translate(198.56 335.860562)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-61" d="M 2194 1759
+Q 1497 1759 1228 1600
+Q 959 1441 959 1056
+Q 959 750 1161 570
+Q 1363 391 1709 391
+Q 2188 391 2477 730
+Q 2766 1069 2766 1631
+L 2766 1759
+L 2194 1759
+z
+M 3341 1997
+L 3341 0
+L 2766 0
+L 2766 531
+Q 2569 213 2275 61
+Q 1981 -91 1556 -91
+Q 1019 -91 701 211
+Q 384 513 384 1019
+Q 384 1609 779 1909
+Q 1175 2209 1959 2209
+L 2766 2209
+L 2766 2266
+Q 2766 2663 2505 2880
+Q 2244 3097 1772 3097
+Q 1472 3097 1187 3025
+Q 903 2953 641 2809
+L 641 3341
+Q 956 3463 1253 3523
+Q 1550 3584 1831 3584
+Q 2591 3584 2966 3190
+Q 3341 2797 3341 1997
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-72" d="M 2631 2963
+Q 2534 3019 2420 3045
+Q 2306 3072 2169 3072
+Q 1681 3072 1420 2755
+Q 1159 2438 1159 1844
+L 1159 0
+L 581 0
+L 581 3500
+L 1159 3500
+L 1159 2956
+Q 1341 3275 1631 3429
+Q 1922 3584 2338 3584
+Q 2397 3584 2469 3576
+Q 2541 3569 2628 3553
+L 2631 2963
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-79" d="M 2059 -325
+Q 1816 -950 1584 -1140
+Q 1353 -1331 966 -1331
+L 506 -1331
+L 506 -850
+L 844 -850
+Q 1081 -850 1212 -737
+Q 1344 -625 1503 -206
+L 1606 56
+L 191 3500
+L 800 3500
+L 1894 763
+L 2988 3500
+L 3597 3500
+L 2059 -325
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-20" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-73" d="M 2834 3397
+L 2834 2853
+Q 2591 2978 2328 3040
+Q 2066 3103 1784 3103
+Q 1356 3103 1142 2972
+Q 928 2841 928 2578
+Q 928 2378 1081 2264
+Q 1234 2150 1697 2047
+L 1894 2003
+Q 2506 1872 2764 1633
+Q 3022 1394 3022 966
+Q 3022 478 2636 193
+Q 2250 -91 1575 -91
+Q 1294 -91 989 -36
+Q 684 19 347 128
+L 347 722
+Q 666 556 975 473
+Q 1284 391 1588 391
+Q 1994 391 2212 530
+Q 2431 669 2431 922
+Q 2431 1156 2273 1281
+Q 2116 1406 1581 1522
+L 1381 1569
+Q 847 1681 609 1914
+Q 372 2147 372 2553
+Q 372 3047 722 3315
+Q 1072 3584 1716 3584
+Q 2034 3584 2315 3537
+Q 2597 3491 2834 3397
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-69" d="M 603 3500
+L 1178 3500
+L 1178 0
+L 603 0
+L 603 3500
+z
+M 603 4863
+L 1178 4863
+L 1178 4134
+L 603 4134
+L 603 4863
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-7a" d="M 353 3500
+L 3084 3500
+L 3084 2975
+L 922 459
+L 3084 459
+L 3084 0
+L 275 0
+L 275 525
+L 2438 3041
+L 353 3041
+L 353 3500
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-65" d="M 3597 1894
+L 3597 1613
+L 953 1613
+Q 991 1019 1311 708
+Q 1631 397 2203 397
+Q 2534 397 2845 478
+Q 3156 559 3463 722
+L 3463 178
+Q 3153 47 2828 -22
+Q 2503 -91 2169 -91
+Q 1331 -91 842 396
+Q 353 884 353 1716
+Q 353 2575 817 3079
+Q 1281 3584 2069 3584
+Q 2775 3584 3186 3129
+Q 3597 2675 3597 1894
+z
+M 3022 2063
+Q 3016 2534 2758 2815
+Q 2500 3097 2075 3097
+Q 1594 3097 1305 2825
+Q 1016 2553 972 2059
+L 3022 2063
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-5b" d="M 550 4863
+L 1875 4863
+L 1875 4416
+L 1125 4416
+L 1125 -397
+L 1875 -397
+L 1875 -844
+L 550 -844
+L 550 4863
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-6b" d="M 581 4863
+L 1159 4863
+L 1159 1991
+L 2875 3500
+L 3609 3500
+L 1753 1863
+L 3688 0
+L 2938 0
+L 1159 1709
+L 1159 0
+L 581 0
+L 581 4863
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-42" d="M 1259 2228
+L 1259 519
+L 2272 519
+Q 2781 519 3026 730
+Q 3272 941 3272 1375
+Q 3272 1813 3026 2020
+Q 2781 2228 2272 2228
+L 1259 2228
+z
+M 1259 4147
+L 1259 2741
+L 2194 2741
+Q 2656 2741 2882 2914
+Q 3109 3088 3109 3444
+Q 3109 3797 2882 3972
+Q 2656 4147 2194 4147
+L 1259 4147
+z
+M 628 4666
+L 2241 4666
+Q 2963 4666 3353 4366
+Q 3744 4066 3744 3513
+Q 3744 3084 3544 2831
+Q 3344 2578 2956 2516
+Q 3422 2416 3680 2098
+Q 3938 1781 3938 1306
+Q 3938 681 3513 340
+Q 3088 0 2303 0
+L 628 0
+L 628 4666
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-5d" d="M 1947 4863
+L 1947 -844
+L 622 -844
+L 622 -397
+L 1369 -397
+L 1369 4416
+L 622 4416
+L 622 4863
+L 1947 4863
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-61"/>
+ <use xlink:href="#DejaVuSans-72" x="61.279297"/>
+ <use xlink:href="#DejaVuSans-72" x="100.642578"/>
+ <use xlink:href="#DejaVuSans-61" x="141.755859"/>
+ <use xlink:href="#DejaVuSans-79" x="203.035156"/>
+ <use xlink:href="#DejaVuSans-20" x="262.214844"/>
+ <use xlink:href="#DejaVuSans-73" x="294.001953"/>
+ <use xlink:href="#DejaVuSans-69" x="346.101562"/>
+ <use xlink:href="#DejaVuSans-7a" x="373.884766"/>
+ <use xlink:href="#DejaVuSans-65" x="426.375"/>
+ <use xlink:href="#DejaVuSans-20" x="487.898438"/>
+ <use xlink:href="#DejaVuSans-5b" x="519.685547"/>
+ <use xlink:href="#DejaVuSans-6b" x="558.699219"/>
+ <use xlink:href="#DejaVuSans-69" x="616.609375"/>
+ <use xlink:href="#DejaVuSans-42" x="644.392578"/>
+ <use xlink:href="#DejaVuSans-5d" x="712.996094"/>
+ </g>
+ </g>
+ </g>
+ <g id="matplotlib.axis_2">
+ <g id="ytick_1">
+ <g id="line2d_7">
+ <defs>
+ <path id="md7cf236cc3" d="M 0 0
+L -3.5 0
+" style="stroke: #000000; stroke-width: 0.8"/>
+ </defs>
+ <g>
+ <use xlink:href="#md7cf236cc3" x="57.6" y="295.488" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_8">
+ <!-- 0 -->
+ <g transform="translate(44.2375 299.287219)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-30" d="M 2034 4250
+Q 1547 4250 1301 3770
+Q 1056 3291 1056 2328
+Q 1056 1369 1301 889
+Q 1547 409 2034 409
+Q 2525 409 2770 889
+Q 3016 1369 3016 2328
+Q 3016 3291 2770 3770
+Q 2525 4250 2034 4250
+z
+M 2034 4750
+Q 2819 4750 3233 4129
+Q 3647 3509 3647 2328
+Q 3647 1150 3233 529
+Q 2819 -91 2034 -91
+Q 1250 -91 836 529
+Q 422 1150 422 2328
+Q 422 3509 836 4129
+Q 1250 4750 2034 4750
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-30"/>
+ </g>
+ </g>
+ </g>
+ <g id="ytick_2">
+ <g id="line2d_8">
+ <g>
+ <use xlink:href="#md7cf236cc3" x="57.6" y="255.168" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_9">
+ <!-- 20 -->
+ <g transform="translate(37.875 258.967219)scale(0.1 -0.1)">
+ <use xlink:href="#DejaVuSans-32"/>
+ <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+ </g>
+ </g>
+ </g>
+ <g id="ytick_3">
+ <g id="line2d_9">
+ <g>
+ <use xlink:href="#md7cf236cc3" x="57.6" y="214.848" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_10">
+ <!-- 40 -->
+ <g transform="translate(37.875 218.647219)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-34" d="M 2419 4116
+L 825 1625
+L 2419 1625
+L 2419 4116
+z
+M 2253 4666
+L 3047 4666
+L 3047 1625
+L 3713 1625
+L 3713 1100
+L 3047 1100
+L 3047 0
+L 2419 0
+L 2419 1100
+L 313 1100
+L 313 1709
+L 2253 4666
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-34"/>
+ <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+ </g>
+ </g>
+ </g>
+ <g id="ytick_4">
+ <g id="line2d_10">
+ <g>
+ <use xlink:href="#md7cf236cc3" x="57.6" y="174.528" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_11">
+ <!-- 60 -->
+ <g transform="translate(37.875 178.327219)scale(0.1 -0.1)">
+ <use xlink:href="#DejaVuSans-36"/>
+ <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+ </g>
+ </g>
+ </g>
+ <g id="ytick_5">
+ <g id="line2d_11">
+ <g>
+ <use xlink:href="#md7cf236cc3" x="57.6" y="134.208" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_12">
+ <!-- 80 -->
+ <g transform="translate(37.875 138.007219)scale(0.1 -0.1)">
+ <use xlink:href="#DejaVuSans-38"/>
+ <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+ </g>
+ </g>
+ </g>
+ <g id="ytick_6">
+ <g id="line2d_12">
+ <g>
+ <use xlink:href="#md7cf236cc3" x="57.6" y="93.888" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_13">
+ <!-- 100 -->
+ <g transform="translate(31.5125 97.687219)scale(0.1 -0.1)">
+ <use xlink:href="#DejaVuSans-31"/>
+ <use xlink:href="#DejaVuSans-30" x="63.623047"/>
+ <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+ </g>
+ </g>
+ </g>
+ <g id="ytick_7">
+ <g id="line2d_13">
+ <g>
+ <use xlink:href="#md7cf236cc3" x="57.6" y="53.568" style="stroke: #000000; stroke-width: 0.8"/>
+ </g>
+ </g>
+ <g id="text_14">
+ <!-- 120 -->
+ <g transform="translate(31.5125 57.367219)scale(0.1 -0.1)">
+ <use xlink:href="#DejaVuSans-31"/>
+ <use xlink:href="#DejaVuSans-32" x="63.623047"/>
+ <use xlink:href="#DejaVuSans-30" x="127.246094"/>
+ </g>
+ </g>
+ </g>
+ <g id="text_15">
+ <!-- bandwidth [GiB/s] -->
+ <g transform="translate(25.432812 219.363937)rotate(-90)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-62" d="M 3116 1747
+Q 3116 2381 2855 2742
+Q 2594 3103 2138 3103
+Q 1681 3103 1420 2742
+Q 1159 2381 1159 1747
+Q 1159 1113 1420 752
+Q 1681 391 2138 391
+Q 2594 391 2855 752
+Q 3116 1113 3116 1747
+z
+M 1159 2969
+Q 1341 3281 1617 3432
+Q 1894 3584 2278 3584
+Q 2916 3584 3314 3078
+Q 3713 2572 3713 1747
+Q 3713 922 3314 415
+Q 2916 -91 2278 -91
+Q 1894 -91 1617 61
+Q 1341 213 1159 525
+L 1159 0
+L 581 0
+L 581 4863
+L 1159 4863
+L 1159 2969
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-6e" d="M 3513 2113
+L 3513 0
+L 2938 0
+L 2938 2094
+Q 2938 2591 2744 2837
+Q 2550 3084 2163 3084
+Q 1697 3084 1428 2787
+Q 1159 2491 1159 1978
+L 1159 0
+L 581 0
+L 581 3500
+L 1159 3500
+L 1159 2956
+Q 1366 3272 1645 3428
+Q 1925 3584 2291 3584
+Q 2894 3584 3203 3211
+Q 3513 2838 3513 2113
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-64" d="M 2906 2969
+L 2906 4863
+L 3481 4863
+L 3481 0
+L 2906 0
+L 2906 525
+Q 2725 213 2448 61
+Q 2172 -91 1784 -91
+Q 1150 -91 751 415
+Q 353 922 353 1747
+Q 353 2572 751 3078
+Q 1150 3584 1784 3584
+Q 2172 3584 2448 3432
+Q 2725 3281 2906 2969
+z
+M 947 1747
+Q 947 1113 1208 752
+Q 1469 391 1925 391
+Q 2381 391 2643 752
+Q 2906 1113 2906 1747
+Q 2906 2381 2643 2742
+Q 2381 3103 1925 3103
+Q 1469 3103 1208 2742
+Q 947 2381 947 1747
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-77" d="M 269 3500
+L 844 3500
+L 1563 769
+L 2278 3500
+L 2956 3500
+L 3675 769
+L 4391 3500
+L 4966 3500
+L 4050 0
+L 3372 0
+L 2619 2869
+L 1863 0
+L 1184 0
+L 269 3500
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-74" d="M 1172 4494
+L 1172 3500
+L 2356 3500
+L 2356 3053
+L 1172 3053
+L 1172 1153
+Q 1172 725 1289 603
+Q 1406 481 1766 481
+L 2356 481
+L 2356 0
+L 1766 0
+Q 1100 0 847 248
+Q 594 497 594 1153
+L 594 3053
+L 172 3053
+L 172 3500
+L 594 3500
+L 594 4494
+L 1172 4494
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-68" d="M 3513 2113
+L 3513 0
+L 2938 0
+L 2938 2094
+Q 2938 2591 2744 2837
+Q 2550 3084 2163 3084
+Q 1697 3084 1428 2787
+Q 1159 2491 1159 1978
+L 1159 0
+L 581 0
+L 581 4863
+L 1159 4863
+L 1159 2956
+Q 1366 3272 1645 3428
+Q 1925 3584 2291 3584
+Q 2894 3584 3203 3211
+Q 3513 2838 3513 2113
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-47" d="M 3809 666
+L 3809 1919
+L 2778 1919
+L 2778 2438
+L 4434 2438
+L 4434 434
+Q 4069 175 3628 42
+Q 3188 -91 2688 -91
+Q 1594 -91 976 548
+Q 359 1188 359 2328
+Q 359 3472 976 4111
+Q 1594 4750 2688 4750
+Q 3144 4750 3555 4637
+Q 3966 4525 4313 4306
+L 4313 3634
+Q 3963 3931 3569 4081
+Q 3175 4231 2741 4231
+Q 1884 4231 1454 3753
+Q 1025 3275 1025 2328
+Q 1025 1384 1454 906
+Q 1884 428 2741 428
+Q 3075 428 3337 486
+Q 3600 544 3809 666
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-2f" d="M 1625 4666
+L 2156 4666
+L 531 -594
+L 0 -594
+L 1625 4666
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-62"/>
+ <use xlink:href="#DejaVuSans-61" x="63.476562"/>
+ <use xlink:href="#DejaVuSans-6e" x="124.755859"/>
+ <use xlink:href="#DejaVuSans-64" x="188.134766"/>
+ <use xlink:href="#DejaVuSans-77" x="251.611328"/>
+ <use xlink:href="#DejaVuSans-69" x="333.398438"/>
+ <use xlink:href="#DejaVuSans-64" x="361.181641"/>
+ <use xlink:href="#DejaVuSans-74" x="424.658203"/>
+ <use xlink:href="#DejaVuSans-68" x="463.867188"/>
+ <use xlink:href="#DejaVuSans-20" x="527.246094"/>
+ <use xlink:href="#DejaVuSans-5b" x="559.033203"/>
+ <use xlink:href="#DejaVuSans-47" x="598.046875"/>
+ <use xlink:href="#DejaVuSans-69" x="675.537109"/>
+ <use xlink:href="#DejaVuSans-42" x="703.320312"/>
+ <use xlink:href="#DejaVuSans-2f" x="771.923828"/>
+ <use xlink:href="#DejaVuSans-73" x="805.615234"/>
+ <use xlink:href="#DejaVuSans-5d" x="857.714844"/>
+ </g>
+ </g>
+ </g>
+ <g id="line2d_14">
+ <path d="M 73.832727 78.913033
+L 92.930053 90.379208
+L 112.02738 82.596779
+L 131.124706 78.732789
+L 150.222032 147.689056
+L 169.319358 138.002232
+L 188.416684 107.389778
+L 207.514011 147.96299
+L 226.611337 209.133074
+L 245.708663 245.912141
+L 264.805989 246.017096
+L 283.903316 246.107218
+L 303.000642 250.228056
+L 322.097968 267.767531
+L 341.195294 269.465771
+L 360.29262 270.09719
+L 379.389947 270.541981
+L 398.487273 270.805213
+" clip-path="url(#p9bc6bf51ba)" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+ <defs>
+ <path id="mba65a8f57e" d="M 0 1.5
+C 0.397805 1.5 0.77937 1.341951 1.06066 1.06066
+C 1.341951 0.77937 1.5 0.397805 1.5 0
+C 1.5 -0.397805 1.341951 -0.77937 1.06066 -1.06066
+C 0.77937 -1.341951 0.397805 -1.5 0 -1.5
+C -0.397805 -1.5 -0.77937 -1.341951 -1.06066 -1.06066
+C -1.341951 -0.77937 -1.5 -0.397805 -1.5 0
+C -1.5 0.397805 -1.341951 0.77937 -1.06066 1.06066
+C -0.77937 1.341951 -0.397805 1.5 0 1.5
+z
+" style="stroke: #1f77b4"/>
+ </defs>
+ <g clip-path="url(#p9bc6bf51ba)">
+ <use xlink:href="#mba65a8f57e" x="73.832727" y="78.913033" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="92.930053" y="90.379208" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="112.02738" y="82.596779" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="131.124706" y="78.732789" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="150.222032" y="147.689056" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="169.319358" y="138.002232" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="188.416684" y="107.389778" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="207.514011" y="147.96299" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="226.611337" y="209.133074" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="245.708663" y="245.912141" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="264.805989" y="246.017096" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="283.903316" y="246.107218" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="303.000642" y="250.228056" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="322.097968" y="267.767531" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="341.195294" y="269.465771" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="360.29262" y="270.09719" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="379.389947" y="270.541981" style="fill: #1f77b4; stroke: #1f77b4"/>
+ <use xlink:href="#mba65a8f57e" x="398.487273" y="270.805213" style="fill: #1f77b4; stroke: #1f77b4"/>
+ </g>
+ </g>
+ <g id="LineCollection_1">
+ <path d="M 150.222032 295.488
+L 150.222032 53.568
+" clip-path="url(#p9bc6bf51ba)" style="fill: none; stroke: #808080; stroke-width: 1.5"/>
+ <path d="M 245.708663 295.488
+L 245.708663 53.568
+" clip-path="url(#p9bc6bf51ba)" style="fill: none; stroke: #808080; stroke-width: 1.5"/>
+ <path d="M 327.192878 295.488
+L 327.192878 53.568
+" clip-path="url(#p9bc6bf51ba)" style="fill: none; stroke: #808080; stroke-width: 1.5"/>
+ </g>
+ <g id="patch_3">
+ <path d="M 57.6 307.584
+L 57.6 41.472
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+ </g>
+ <g id="patch_4">
+ <path d="M 414.72 307.584
+L 414.72 41.472
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+ </g>
+ <g id="patch_5">
+ <path d="M 57.6 307.584
+L 414.72 307.584
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+ </g>
+ <g id="patch_6">
+ <path d="M 57.6 41.472
+L 414.72 41.472
+" style="fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square"/>
+ </g>
+ <g id="text_16">
+ <!-- L1 -->
+ <g transform="translate(131.124706 63.648)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-4c" d="M 628 4666
+L 1259 4666
+L 1259 531
+L 3531 531
+L 3531 0
+L 628 0
+L 628 4666
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-4c"/>
+ <use xlink:href="#DejaVuSans-31" x="55.712891"/>
+ </g>
+ </g>
+ <g id="text_17">
+ <!-- L2 -->
+ <g transform="translate(226.611337 63.648)scale(0.1 -0.1)">
+ <use xlink:href="#DejaVuSans-4c"/>
+ <use xlink:href="#DejaVuSans-32" x="55.712891"/>
+ </g>
+ </g>
+ <g id="text_18">
+ <!-- L3 -->
+ <g transform="translate(308.095551 63.648)scale(0.1 -0.1)">
+ <use xlink:href="#DejaVuSans-4c"/>
+ <use xlink:href="#DejaVuSans-33" x="55.712891"/>
+ </g>
+ </g>
+ <g id="legend_1">
+ <g id="patch_7">
+ <path d="M 351.835625 64.150125
+L 407.72 64.150125
+Q 409.72 64.150125 409.72 62.150125
+L 409.72 48.472
+Q 409.72 46.472 407.72 46.472
+L 351.835625 46.472
+Q 349.835625 46.472 349.835625 48.472
+L 349.835625 62.150125
+Q 349.835625 64.150125 351.835625 64.150125
+z
+" style="fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter"/>
+ </g>
+ <g id="line2d_15">
+ <path d="M 353.835625 54.570438
+L 363.835625 54.570438
+L 373.835625 54.570438
+" style="fill: none; stroke: #1f77b4; stroke-width: 1.5; stroke-linecap: square"/>
+ <g>
+ <use xlink:href="#mba65a8f57e" x="363.835625" y="54.570438" style="fill: #1f77b4; stroke: #1f77b4"/>
+ </g>
+ </g>
+ <g id="text_19">
+ <!-- copy -->
+ <g transform="translate(381.835625 58.070438)scale(0.1 -0.1)">
+ <defs>
+ <path id="DejaVuSans-63" d="M 3122 3366
+L 3122 2828
+Q 2878 2963 2633 3030
+Q 2388 3097 2138 3097
+Q 1578 3097 1268 2742
+Q 959 2388 959 1747
+Q 959 1106 1268 751
+Q 1578 397 2138 397
+Q 2388 397 2633 464
+Q 2878 531 3122 666
+L 3122 134
+Q 2881 22 2623 -34
+Q 2366 -91 2075 -91
+Q 1284 -91 818 406
+Q 353 903 353 1747
+Q 353 2603 823 3093
+Q 1294 3584 2113 3584
+Q 2378 3584 2631 3529
+Q 2884 3475 3122 3366
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-6f" d="M 1959 3097
+Q 1497 3097 1228 2736
+Q 959 2375 959 1747
+Q 959 1119 1226 758
+Q 1494 397 1959 397
+Q 2419 397 2687 759
+Q 2956 1122 2956 1747
+Q 2956 2369 2687 2733
+Q 2419 3097 1959 3097
+z
+M 1959 3584
+Q 2709 3584 3137 3096
+Q 3566 2609 3566 1747
+Q 3566 888 3137 398
+Q 2709 -91 1959 -91
+Q 1206 -91 779 398
+Q 353 888 353 1747
+Q 353 2609 779 3096
+Q 1206 3584 1959 3584
+z
+" transform="scale(0.015625)"/>
+ <path id="DejaVuSans-70" d="M 1159 525
+L 1159 -1331
+L 581 -1331
+L 581 3500
+L 1159 3500
+L 1159 2969
+Q 1341 3281 1617 3432
+Q 1894 3584 2278 3584
+Q 2916 3584 3314 3078
+Q 3713 2572 3713 1747
+Q 3713 922 3314 415
+Q 2916 -91 2278 -91
+Q 1894 -91 1617 61
+Q 1341 213 1159 525
+z
+M 3116 1747
+Q 3116 2381 2855 2742
+Q 2594 3103 2138 3103
+Q 1681 3103 1420 2742
+Q 1159 2381 1159 1747
+Q 1159 1113 1420 752
+Q 1681 391 2138 391
+Q 2594 391 2855 752
+Q 3116 1113 3116 1747
+z
+" transform="scale(0.015625)"/>
+ </defs>
+ <use xlink:href="#DejaVuSans-63"/>
+ <use xlink:href="#DejaVuSans-6f" x="54.980469"/>
+ <use xlink:href="#DejaVuSans-70" x="116.162109"/>
+ <use xlink:href="#DejaVuSans-79" x="179.638672"/>
+ </g>
+ </g>
+ </g>
+ </g>
+ </g>
+ <defs>
+ <clipPath id="p9bc6bf51ba">
+ <rect x="57.6" y="41.472" width="357.12" height="266.112"/>
+ </clipPath>
+ </defs>
+</svg>
diff --git a/src/performance_measurement/figures/plot.py b/src/performance_measurement/figures/plot.py
index b89cb94..56b9318 100644
--- a/src/performance_measurement/figures/plot.py
+++ b/src/performance_measurement/figures/plot.py
@@ -1,164 +1,177 @@
#!/usr/bin/env python
"""Script to visualize google-benchmark output"""
from __future__ import print_function
import argparse
import sys
import logging
import json
import pandas as pd
import matplotlib.pyplot as plt
import pathlib
logging.basicConfig(format="[%(levelname)s] %(message)s")
METRICS = [
"real_time",
"cpu_time",
"bytes_per_second",
"items_per_second",
"iterations",
]
TRANSFORMS = {"": lambda x: x, "inverse": lambda x: 1.0 / x}
def get_default_ylabel(args):
"""Compute default ylabel for commandline args"""
label = ""
if args.transform == "":
label = args.metric
else:
label = args.transform + "(" + args.metric + ")"
if args.relative_to is not None:
label += " relative to %s" % args.relative_to
return label
def parse_args():
"""Parse commandline arguments"""
parser = argparse.ArgumentParser(description="Visualize google-benchmark output")
parser.add_argument(
"-f",
metavar="FILE",
type=argparse.FileType("r"),
default=sys.stdin,
dest="file",
help="path to file containing the csv or json benchmark data",
)
parser.add_argument(
"-m",
metavar="METRIC",
choices=METRICS,
default=METRICS[0],
dest="metric",
help="metric to plot on the y-axis, valid choices are: %s" % ", ".join(METRICS),
)
parser.add_argument(
"-t",
metavar="TRANSFORM",
choices=TRANSFORMS.keys(),
default="",
help="transform to apply to the chosen metric, valid choices are: %s"
% ", ".join(list(TRANSFORMS)),
dest="transform",
)
parser.add_argument(
"-r",
metavar="RELATIVE_TO",
type=str,
default=None,
dest="relative_to",
help="plot metrics relative to this label",
)
parser.add_argument(
"--xlabel", type=str, default="input size", help="label of the x-axis"
)
parser.add_argument("--ylabel", type=str, help="label of the y-axis")
parser.add_argument("--title", type=str, default="", help="title of the plot")
parser.add_argument(
"--logx", action="store_true", help="plot x-axis on a logarithmic scale"
)
parser.add_argument(
"--logy", action="store_true", help="plot y-axis on a logarithmic scale"
)
parser.add_argument(
"--output", type=str, default="", help="File in which to save the graph"
)
args = parser.parse_args()
if args.ylabel is None:
args.ylabel = get_default_ylabel(args)
return args
def parse_input_size(name):
splits = name.split("/")
if len(splits) == 1:
return 1
return int(splits[-1])
def read_data(args):
"""Read and process dataframe using commandline args"""
extension = pathlib.Path(args.file.name).suffix
try:
if extension == ".csv":
data = pd.read_csv(args.file, usecols=["name", args.metric])
elif extension == ".json":
json_data = json.load(args.file)
data = pd.DataFrame(json_data["benchmarks"])
else:
logging.error("Unsupported file extension '{}'".format(extension))
exit(1)
except ValueError:
logging.error(
'Could not parse the benchmark data. Did you forget "--benchmark_format=[csv|json] when running the benchmark"?'
)
exit(1)
data["label"] = data["name"].apply(lambda x: x.split("/")[-2])
data["input"] = data["name"].apply(parse_input_size)
data[args.metric] = data[args.metric].apply(TRANSFORMS[args.transform])
return data
def plot_groups(label_groups, args):
"""Display the processed data"""
+ fig, ax = plt.subplots()
+
for label, group in label_groups.items():
- plt.plot(group["input"], group[args.metric], label=label, marker=".")
+ ax.plot(
+ group["input"],
+ group[args.metric] / 1024 / 1024 / 1024,
+ label=label,
+ marker=".",
+ )
if args.logx:
- plt.xscale("log", base=2)
+ ax.set_xscale("log", base=2)
if args.logy:
- plt.yscale("log")
- plt.xlabel(args.xlabel)
- plt.ylabel(args.ylabel)
- plt.title(args.title)
- plt.legend()
+ ax.set_yscale("log")
+ ax.set_xlabel(args.xlabel)
+ ax.set_ylabel(args.ylabel)
+ ax.set_title(args.title)
+ ax.legend()
+
+ ax.vlines([32, 1024, 19712], 0, 120, color="gray")
+ ax.text(16, 115, "L1")
+ ax.text(512, 115, "L2")
+ ax.text(19712 / 2, 115, "L3")
+
if args.output:
logging.info("Saving to %s" % args.output)
plt.savefig(args.output)
else:
plt.show()
def main():
"""Entry point of the program"""
args = parse_args()
data = read_data(args)
label_groups = {}
for label, group in data.groupby("label"):
label_groups[label] = group.set_index("input", drop=False)
if args.relative_to is not None:
try:
baseline = label_groups[args.relative_to][args.metric].copy()
except KeyError as key:
msg = "Key %s is not present in the benchmark output"
logging.error(msg, str(key))
exit(1)
if args.relative_to is not None:
for label in label_groups:
label_groups[label][args.metric] /= baseline
plot_groups(label_groups, args)
if __name__ == "__main__":
main()
diff --git a/src/performance_measurement/performance_measurement.tex b/src/performance_measurement/performance_measurement.tex
index ba7fa24..2446979 100644
--- a/src/performance_measurement/performance_measurement.tex
+++ b/src/performance_measurement/performance_measurement.tex
@@ -1,693 +1,701 @@
\renewcommand{\FIGREP}{src/performance_measurement/figures}
\section{Performance measurement}
\label{sec:performance_measurement}
\intersec{helvetios}
\begin{frame}
\frametitle{Goal of this section}
\framesubtitle{}
\begin{itemize}
\item Key concepts to quantify performance
\begin{itemize}
\item Metrics
\item Using a profiler
\item Scalings, speedup, efficiency
\end{itemize}
\item Roofline model
\end{itemize}
\end{frame}
\subsection{Performance metrics}
\label{sec:metrics}
\begin{frame}
\frametitle{Performance metrics}
\framesubtitle{}
\begin{itemize}
\item How can we quantify performance?
\item We need to define a means to measure it
\item We will focus on the most interesting metrics for HPC
\end{itemize}
\vfill
\pause
\begin{itemize}
\item The first that comes in mind is \textit{time}, e.g. time-to-solution
\item Derived metrics: speedup and efficiency
\end{itemize}
\vfill
\pause
\begin{itemize}
\item Scientific codes do computations on floating point numbers
\item A second metric is the number of \textit{floating-point operations per second}
(\si{\flops})
\end{itemize}
\vfill
\pause
\begin{itemize}
\item Finally, the \textit{memory bandwidth} indicates how much data does your code
transfers per unit of time
\end{itemize}
\end{frame}
\note{
\begin{itemize}
\item My code is super fast, it runs in $2.5\si{\ns}$!
\item It seems fast, but is it? How fast can your hardware go?
\item To really understand how much your code exploit the hardware, we use
the \si{\flops} and memory BW
\item Your hardware has theoretical maximum values for those
\item You can compare the values from your code to the max to see how well
you use the hardware
\end{itemize}
}
\subsection{Profiling}
\label{sec:profiling}
\begin{frame}
\frametitle{Profiling}
\framesubtitle{A tool to measure various timings}
\begin{itemize}
\item Where is my application spending most of its time?
\begin{itemize}
\item (bad) measure time ``by hand'' using timings and prints
\item (good) use a tool made for this, e.g. Intel Amplifier, Score-P,
gprof
\end{itemize}
\end{itemize}
\vfill
\begin{itemize}
\item There are two types of profiling techniques
\begin{itemize}
\item Sampling: you stop the code every now and then and check in
which function you are
\item Code instrumentation: instructions are added at compile time
to trigger measurements
\end{itemize}
\end{itemize}
\vfill
\begin{itemize}
\item In addition to timings, profilers give you a lot more information on
\begin{itemize}
\item Memory usage
\item Hardware counters
\item CPU activity
\item MPI communications
\item etc.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,exercise]
\frametitle{Profiling}
\framesubtitle{Interactive demonstration}
\begin{itemize}
\item For the purpose of this exercise, we will use MiniFE
\begin{itemize}
\item 3D implicit finite-elements on an unstructured mesh
\item C++ mini application
\item \url{https://github.com/Mantevo/miniFE}
\item You don't need to understand what the code does!
\end{itemize}
\item We will use Intel VTune, part of the \href{https://www.intel.com/content/www/us/en/developer/tools/oneapi/toolkits.html\#base-kit}{OneAPI Base toolkit (free)}
\end{itemize}
\vfill
\begin{itemize}
\item Download miniFE
\item Compile the basic version found in \cmd{ref/src}
\item Profile the code using the hotspot analysis
\item Open Intel VTune and select your timings
\item Play around and find the 5 most time-consuming functions
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Profiling}
\framesubtitle{Compile MiniFE}
\begin{itemize}
\item Download miniFE
\begin{bashcode}
$> git clone https://github.com/Mantevo/miniFE.git
$> cd miniFE
\end{bashcode}
\item Compile the basic version found in \code{ref/src}
\begin{itemize}
\item You will need to load a compiler and an MPI library
\begin{bashcode}
$> module load intel intel-mpi intel-vtune
\end{bashcode}%$
\item Change the \cmd{Makefile} to set \cmd{CXX=mpiicpc} and \cmd{CC=mpiicc} and compile
\begin{bashcode}
$> make
\end{bashcode}%$
\item Make sure to compile your code with \cmd{-g -O3}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Profiling}
\framesubtitle{Profile MiniFE}
\begin{itemize}
\item Profile the code using
\begin{bashcode}
$> srun -n 1 amplxe-cl -collect hotspots -r prof_results -- ./miniFE.x -nx 128 -ny 128 -nz 128
\end{bashcode}%$
\item This will profile for the ``hotspots'' and store the timings in \cmd{prof\_results}
\item You can have more info on the types of analysis with
\begin{bashcode}
$> amplxe-cl -h collect
\end{bashcode}%$
\item Open Intel VTune and select your timings
\begin{bashcode}
$> amplxe-gui prof_results/prof_results.amplxe
\end{bashcode}%$
\item Play around and find the 5 most time-consuming functions
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Profiling}
\framesubtitle{What do we learn?}
\begin{itemize}
\item 50.0\% of the time spent in matrix/vector multiplications
\item 12.5\% of time spent imposing boundary conditions
\item etc.
\item Does the problem size influence the timings?
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Profiling}
\framesubtitle{Smaller problem}
\begin{itemize}
\item This time, we profile a problem of size $(16, 16, 16)$
\item 13.6\% of the time is spent opening libraries
\item 13.6\% of the time is spent initializing MPI
\item etc.
\item Depending on the problem size, different parts of the code will dominate
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Profiling}
\framesubtitle{Some tips and tricks}
\begin{itemize}
\item Profile a code without bugs!
\item Choose the right problem size (representative of your simulations)
\item Focus on the functions taking the most time first
\item If the profile is not explicit, try refactoring into smaller functions
\begin{itemize}
\item Some profilers, e.g. ScoreP, let you define custom regions
\end{itemize}
\end{itemize}
\end{frame}
\subsection{Scalings, speedup and efficiency}
\label{sec:scalings}
\begin{frame}
\frametitle{Speedup and efficiency}
\framesubtitle{}
\begin{itemize}
\item Two important metrics are derived from timings
\item Compare timings with $n$ processes, $T_{n}$, against the reference
timing, $T_\text{ref}$
\end{itemize}
\vfill
\begin{minipage}{0.3\linewidth}
\begin{center}
\textbf{Speedup}
\end{center}
\begin{equation*}
S(n) = \frac{T_\text{ref}}{T_{n}}
\end{equation*}
\end{minipage}
\hspace{0.5cm}
\begin{minipage}{0.3\linewidth}
\begin{center}
\textbf{Efficiency}
\end{center}
\begin{equation*}
E(n) = \frac{S(n)}{n}
\end{equation*}
\end{minipage}
\vfill
\begin{itemize}
\item We want $S(n)$ as close to $n$ and $E(n)$ as close to 1 (100\%) as possible
\end{itemize}
\end{frame}
\begin{frame}[t]
\frametitle{Strong scaling}
\framesubtitle{}
\begin{itemize}
\item Scalings are a way to assess how well a program performs when adding
computational resources
\item Strong scaling: add resources, keep total amount of work
constant
\begin{equation*}
S(n) = \frac{T_{1}}{T_{n}}, \qquad E(n) = \frac{S(n)}{n} = \frac{T_{1}}{nT_{n}}
\end{equation*}
\item Strong scaling is an indication on how much profitable it is to
add resources to solve your problem
\end{itemize}
\addimage[width=6cm]{\FIGREP/strong_scaling}{5cm}{1cm}
\end{frame}
\begin{frame}[t]
\frametitle{Weak scaling}
\framesubtitle{}
\begin{itemize}
\item Weak scaling: add resources and maintain amount of work per resource
constant
\begin{equation*}
S(n) = \frac{nT_{1}}{T_{n}}, \qquad E(n) = \frac{S(n)}{n} = \frac{T_{1}}{T_{n}}
\end{equation*}
\item Weak scalings are an indication on how well your code will perform on
a bigger machine (and with a bigger problem)
\item These scalings are always required for a proposal
\begin{itemize}
\item For strong scalings the metric is speedup (how do I improve performance)
\item For weak scalings the metric is efficiency (how well
performance is kept)
\end{itemize}
\end{itemize}
\addimage[width=6cm]{\FIGREP/weak_scaling}{5cm}{1cm}
\end{frame}
\subsection{Amdahl's law}
\label{sec:amdahl}
\begin{frame}[t]
\frametitle{Amdahl's law}
\framesubtitle{}
\begin{itemize}
\item Amdahl's law gives you an upper bound to the achievable speedup for a
fixed problem size
\item By definition it is a strong scaling analysis
\vfill
\pause
\item Assume a fraction $p$ of your code is (perfectly) parallel and timing with 1 process
is $T_{1}$
\item Timing with $n$ processes is
\begin{equation*}
T_{n} = (1-p) T_{1} + \frac{p}{n}T_{1} = \left[ (1-p) + \frac{p}{n}\right] T_{1}
\end{equation*}
\pause
\item Speedup becomes
\begin{equation*}
S(n) = \frac{T_{1}}{T_{n}} = \frac{1}{(1-p) + \frac{p}{n}}
\end{equation*}
\vfill
\pause
\item In the limit of infinite resources
\begin{equation*}
\lim_{n\rightarrow\infty}S(n) = \frac{1}{1-p}
\end{equation*}
\end{itemize}
\onslide<2->\addimage[width=3cm]{\FIGREP/amdahl_illustration}{12.5cm}{1.0cm}
\end{frame}
\begin{frame}[b]
\frametitle{Amdahl's law}
\framesubtitle{}
\begin{itemize}
\item Limited by the serial part (very sensitive)!
\item Does this mean we cannot exploit large HPC machines?
\pause
\item No, in general with more resources, we simulate larger systems
$\Rightarrow$ weak scaling (see
\href{https://en.wikipedia.org/wiki/Gustafson\%27s_law}{Gustafson law})
\end{itemize}
\onslide<1->\addimage[width=8.cm]{\FIGREP/amdahl_speedup}{4cm}{2cm}
\end{frame}
\begin{frame}
\frametitle{\si{\flops} and memory bandwidth}
\framesubtitle{}
\begin{itemize}
\item FLOPs are floating point operations, e.g. $+, -, \times, \div$
\item Can be evaluated by hand, dividing the number of operations by the running time
\vfill
\item Memory bandwidth measures the amount of data transferred by unit of
time [\si{\byte\per\second}, \si{\kibi\byte\per\second},
\si{\mebi\byte\per\second}, \si{\gibi\byte\per\second}, ...]
\item Can be measured by hand dividing the amount of data transferred by the
running time
\vfill
\item In both cases, generally use tools such as PAPI, Tau, likwid, Intel
Amplxe, STREAM, etc.
\end{itemize}
\end{frame}
\begin{frame}[t,fragile]
\frametitle{Performance measurement}
\framesubtitle{A simple DAXPY example}
\begin{itemize}
\item Assume \href{https://en.wikichip.org/wiki/intel/xeon_gold/6132}{Intel Xeon Gold 6132} (Gacrux)
\end{itemize}
\cxxfile[%
title={optimization/daxpy.cc},
minted options app={
% highlightlines={2, 7},
firstline=25,
lastline=27,
firstnumber=1,
}]{examples/optimization/daxpy.cc}
\begin{itemize}
\item My code runs in \SI{174.25}{\ms}. It is amazingly fast!
\end{itemize}
\pause
\vfill
\begin{itemize}
\item Each iteration has 2 FLOP (1 add and 1 mul) and there are \cmd{N = 1e8}
iterations
\item Our code $\SI{2d8}{\flop} / \SI{174.25d-3}{\second} = \SI{0.001}{\tera\flops}$
\item Our hardware can achieve a theoretical peak performance of $\SI{1.16}{\tera\flops}$...
\end{itemize}
\pause
\vfill
\begin{itemize}
\item Each iteration has 3 memory operations (2 loads and 1 store)
\item Our code $\SI{2.23}{\gibi\byte} / \SI{174.25d-3}{\second} = \SI{12.82}{\gibi\byte\per\second}$
\item Our hardware can achieve a theoretical memory bandwidth of $\SI{125}{\gibi\byte\per\second}$...
\end{itemize}
\end{frame}
\subsection{Roofline model}
\label{sec:roofline}
\begin{frame}[t]
\frametitle{Roofline model}
\framesubtitle{}
\begin{itemize}
\item How well am I exploiting the hardware resources?
\item The roofline model is a performance model allowing to have an estimate
to this question
\end{itemize}
\vspace{1cm}
\pause
\begin{itemize}
\item Key concept: the arithmetic intensity, $AI$, of an algorithm is \# \si{\flop\per\byte} of data transferred
\item It measures data reuse
\end{itemize}
\addimage[width=8.cm]{\FIGREP/ai}{4cm}{0.5cm}
\end{frame}
\begin{frame}[t,fragile]
\frametitle{Roofline model}
\framesubtitle{How to find arithmetic intensity}
\begin{itemize}
\item For very simple algorithms, you can compute the AI
\item Let's take back the DAXPY example
\cxxfile[%
title={optimization/daxpy.cc},
minted options app={
% highlightlines={2, 7},
firstline=25,
lastline=27,
firstnumber=1,
}]{examples/optimization/daxpy.cc}
\item There are 2 operations (1 add and 1 mul)
\item Three 8-byte memory operations (2 loads and 1 store)
\item The AI is then $2/24 = 1/12$
\pause
\item For more complex algorithms, use a tool, e.g. Intel Advisor
\end{itemize}
\end{frame}
\begin{frame}[t]
\frametitle{Roofline model}
\framesubtitle{Building the model}
\begin{itemize}
\item Roofline model is plotted on \textbf{log-log scale}
\begin{itemize}
\item x-axis is the $AI$
\item y-axis is \si{\flops}
\end{itemize}
\pause
\item The hardware limits are defined by
\begin{equation*}
P = \min(P_{\text{max}}, b_{s} \cdot AI)
\end{equation*}
\begin{itemize}
\item $P_{\text{max}}$ is the CPU peak \si{\flops}
\item$AI$ is the intensity
\item $b_{s}$ is the memory BW
\end{itemize}
\end{itemize}
\onslide<1>\addimage[width=5cm]{\FIGREP/roofline_1}{5.5cm}{0.5cm}
\onslide<2>\addimage[width=5cm]{\FIGREP/roofline_2}{5.5cm}{0.5cm}
\onslide<3>\addimage[width=5cm]{\FIGREP/roofline_3}{5.5cm}{0.5cm}
\end{frame}
\begin{frame}[t]
\frametitle{Roofline model}
\framesubtitle{Building the model}
\begin{itemize}
\item Refinements can be made to the Roofline model
\item Adding a memory hierarchy with caches
\item Adding different levels of DLP (Data-Level parallelism)
\item They give you hint on what to optimize for
\end{itemize}
\addimage[width=7cm]{\FIGREP/roofline_extended}{4.5cm}{0.5cm}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Roofline model}
\framesubtitle{How to find the peak performance}
\begin{itemize}
\item Theoretical peak performance\\
\vspace{-2ex}
\begin{minipage}{.4\linewidth}
\begin{align*}
P_{\text{max}} = & \textcolor{white}{\times} \text{Number of FP ports (ILP)} \\
& \times \text{flops} / \text{cycles (e.g. 2 for FMA)} \\
& \times \text{vector size (DLP)} \\
& \times \text{frequency (in GHz)} \\
& \times \text{number of cores (TLP)}
\end{align*}
\end{minipage}
\vspace{3ex}
\item Example:
\href{https://en.wikichip.org/wiki/intel/xeon_gold/6132}{Intel
Xeon Gold 6132}\\
\vspace{-2ex}
\begin{minipage}{.4\linewidth}
\begin{align*}
P_{\text{max}} = & \textcolor{white}{\times} 2 \text{ (ports)} \\
& \times \SI{2}{\flop\per\cycle} \text{ (2 for FMA)} \\
& \times \frac{\SI{512}{\bit} \text{ (AVX512)} }{\SI{64}{\bit}\text{ (double)}} \\
& \times \SI{2.3}{\giga\hertz} \\
& \times 14 \text{ (cores)} \\
= & \SI{1.16}{\tera\flops}
\end{align*}
\end{minipage}
\addimage[width=6cm]{\FIGREP/skylake_server_block_diagram}{9cm}{0.8cm}
\pause
\vspace{3ex}
\item Or use a software that estimates it
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Roofline model}
\framesubtitle{How to find the memory bandwidth}
\begin{itemize}
\item Theoretical memory bandwidth of the memory
\begin{align*}
\text{BW}_{\text{max}} = &\textcolor{white}{\times} \text{Number of transfers per second} \\
& \times \text{Bus width} \\
& \times \text{Number of interfaces}
\end{align*}
\item In general, we suppose that RAM matches CPU bandwidth (found on the CPU spec. list)
\item Example:
\href{https://en.wikichip.org/wiki/intel/xeon_gold/6132}{Intel
Xeon Gold 6132}
\begin{align*}
\text{BW}_{\text{max}} = &\textcolor{white}{\times} \SI{2666}{\mega\transfer\per\second} \text{ (DDR4 2666)} \\
& \times \SI{8}{\byte\per\transfer} \text{ (64bit bus)}\\
& \times 6
\end{align*}
\begin{itemize}
\item $\SI{19.86}{\gibi\byte\per\second}$ for 1 channel
\item Maximum of $\SI{119.18}{\gibi\byte\per\second}$
\end{itemize}
\pause
\item Or use a software that estimates it
\end{itemize}
\begin{itemize}
\item A corollary from ``theoretical'' is that it is not achievable in practice!
\end{itemize}
\end{frame}
\begin{frame}[fragile]
\frametitle{Roofline model}
\framesubtitle{How to measure the actual values}
\begin{itemize}
\item Peak performance measurement
\begin{itemize}
\item Using a compute bound kernel
\item Using dgemm:\\
1 core: $\SI{98.0}{\giga\flops}$\\
14 cores: $\SI{965.0}{\giga\flops}$
\end{itemize}
\item Bandwidth measurement
\begin{itemize}
\item Using a memory bound kernel
\item Using stream (triad):\\
1 core: $\SI{12.7}{\gibi\byte\per\second}$\\
6 core: $\SI{70.1}{\gibi\byte\per\second}$\\
9 core: $\SI{82.7}{\gibi\byte\per\second}$
- \addimage[width=6cm]{\FIGREP/skylake_server_block_diagram}{9cm}{0.8cm}
\end{itemize}
\end{itemize}
+ \addimage[width=6cm]{\FIGREP/bandwidth}{9cm}{1.5cm}
\end{frame}
+\begin{frame}[fragile]
+ \frametitle{Roofline model}
+ \framesubtitle{Intel Amplifier}
+ \addimage[width=12cm]{\FIGREP/Roofline}{1cm}{1cm}
+\end{frame}
+
+
+
\begin{frame}[fragile,t]
\frametitle{Optimization}
\framesubtitle{}
\begin{itemize}
\item We now have a pretty good idea of which part of the code to optimize
\item Different options are possible (by order of complexity)
\begin{enumerate}
\item Compiler and linker flags
\item Optimized external libraries
\item Handmade optimization (loop reordering, better data access,
etc.)
\item Algorithmic changes
\end{enumerate}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Optimization}
\framesubtitle{Compiler flags}
\begin{itemize}
\item Compilers have a set of optimizations they can do (if possible)
\item You can find a list of \href{https://gcc.gnu.org/onlinedocs/gcc/gcc-command-options/options-that-control-optimization.html}{options
for GNU compilers on their doc}
\pause
\item Common options are:
\begin{itemize}
\item \cmd{-O0}, \cmd{-O1}, \cmd{-O2}, \cmd{-O3}: from almost no
optimizations to most optimizations
\pause
\item \cmd{-Ofast}: activate more aggressive options, \eg{}
\cmd{-ffast-math} (but can produce wrong results in some
particular cases)
\end{itemize}
\pause
\item Test your program with different options (\cmd{-O3} does not
necessarily leads to faster programs)
\item Note that the more optimization the longer the compilation time
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Optimization}
\framesubtitle{Optimized libraries}
\begin{itemize}
\item Do not re-invent the wheel!
\item A lot of optimized libraries exist with different purposes (solvers,
data structures, I/O, etc.). A few examples:
\begin{itemize}
\item Solvers: PETSc, MUMPS, LAPACK, scaLAPACK, PARDISO, etc.
\item I/O: HDF5, ADIOS, etc.
\item Math libraries: FFTW, BLAS, etc.
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Optimization}
\framesubtitle{Handmade optimizations}
\begin{itemize}
\item Sometimes, we cannot rely on compiler options or libraries and we must
optimize ``by hand''
\item Usually, the goal is to rewrite the code in such a way that the
compiler can optimize it
\item Start by having a correct program before trying to optimize
\item ``Premature optimization is the root of all evil'', D. Knuth
\end{itemize}
\end{frame}
\subsection{Pareto principle}
\label{sec:pareto}
\begin{frame}
\frametitle{Pareto principle}
\framesubtitle{The 80/20 rule}
\begin{itemize}
\item General principle that states that 80\% of the effect comes from 20\%
of causes
\item Applies in many domains and especially in optimization
\item 80\% of the time is spent in 20\% of your code
\item Concentrate on those 20\% and don't arbitrarily optimize
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Optimization}
\framesubtitle{Algorithmic optimizations}
\begin{itemize}
\item Example of matrix/matrix multiplication. Graph shows complexity ($\mathcal{O}(n^{\omega})$) for
different algorithms
\end{itemize}
\onslide<2>\addimage[width=7cm]{\FIGREP/matmul}{4.5cm}{0.5cm}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Parallelization}
\framesubtitle{When to parallelize}
\begin{itemize}
\item Only when your code has \textit{no bugs} and is \textit{optimized}
\item Are your ready to parallelize?
\begin{enumerate}
\item Is it worth to parallelize my code? Does my algorithm scale?
\item Performance prediction?
\item Profiling?
\item Bottelnecks?
\item Which parallel paradigm should I use? What is the target architecture
(SMP, cluster, GPU, hybrid, etc)?
\end{enumerate}
\end{itemize}
\end{frame}
\begin{frame}[fragile,t]
\frametitle{Parallelization}
\framesubtitle{When to parallelize}
In 1991, David H. Bailey published a famous paper: \href{https://www.davidhbailey.com/dhbpapers/twelve-ways.pdf}{Twelve ways to fool
the masses when giving performance results on parallel computers}
\vspace{1cm}
\textit{6: Compare your results against scalar, unoptimized code on Crays.}
\addimage[width=7cm]{\FIGREP/dhb}{4.5cm}{0.5cm}
\end{frame}
%%% Local Variables:
%%% mode: latex
%%% TeX-master: "../../phys_743_parallel_programming"
%%% End:

Event Timeline