feat(lab05): add ex 1 with report

2026-05-28 15:37:12 +02:00
parent 5d8ef47e0e
commit 6c5c8750b3
4 changed files with 231 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -60,3 +60,5 @@ build
 src/03-led-controller/led-controller
 src/04-multiprocessing/multiprocessing
 src/04-multiprocessing/cgroups
+src/05-optimization/ex01/basic
+src/05-optimization/ex01/optimized
--- a/doc/lab05-optimization/main.typ
+++ b/doc/lab05-optimization/main.typ
@@ -0,0 +1,184 @@
+#import "/doc/metadata.typ": *
+
+= Optimization
+
+In this laboratory, the usage of `perf` as tool is experimented. 
+
+
+
+```
+Performance counter stats for './ex1':
+
+          40609.10 msec task-clock                #    1.000 CPUs utilized          
+                22      context-switches          #    0.542 /sec                   
+                 0      cpu-migrations            #    0.000 /sec                   
+             48867      page-faults               #    1.203 K/sec                  
+       33136692484      cycles                    #    0.816 GHz                    
+        1671194529      instructions              #    0.05  insn per cycle         
+         269592231      branches                  #    6.639 M/sec                  
+           1013366      branch-misses             #    0.38% of all branches        
+
+      40.618926728 seconds time elapsed
+
+      39.901620000 seconds user
+       0.296158000 seconds sys
+
+```
+This program has done 22 context-switches and has 40.6s elapsed.
+
+#task([
+Measure the performance of the ex1
+],[
+```
+Performance counter stats for './ex1':
+
+          40609.10 msec task-clock                #    1.000 CPUs utilized          
+                22      context-switches          #    0.542 /sec                   
+                 0      cpu-migrations            #    0.000 /sec                   
+             48867      page-faults               #    1.203 K/sec                  
+       33136692484      cycles                    #    0.816 GHz                    
+        1671194529      instructions              #    0.05  insn per cycle         
+         269592231      branches                  #    6.639 M/sec                  
+           1013366      branch-misses             #    0.38% of all branches        
+
+      40.618926728 seconds time elapsed
+
+      39.901620000 seconds user
+       0.296158000 seconds sys
+
+```
+This program has done 22 context-switches and has 40.6s elapsed.
+])
+
+#task([
+Which error is in the program of ex1 ?
+],[
+The program has 2 loops to go trhough the array. But, there is another loops which encapsulate the 2 others. It involves that the whole array is iterated through 10 times for an addition operation. That's the problem. This can be solve by removing the extren loop and putting a addition of 10:
+
+```c
+    int i, j;
+    for (i = 0; i < SIZE; i++)
+    {
+        for (j = 0; j < SIZE; j++)
+        {
+            array[j][i]+= 10;
+        }
+    }
+```
+
+With these modifications the performance must be a multiple of 10. 
+
+```
+ Performance counter stats for './optimized':
+
+           4759.07 msec task-clock                #    0.998 CPUs utilized          
+                20      context-switches          #    4.203 /sec                   
+                 0      cpu-migrations            #    0.000 /sec                   
+             48866      page-faults               #   10.268 K/sec                  
+        3883198165      cycles                    #    0.816 GHz                    
+         282691820      instructions              #    0.07  insn per cycle         
+          40234737      branches                  #    8.454 M/sec                  
+            653642      branch-misses             #    1.62% of all branches        
+
+       4.768030627 seconds time elapsed
+
+       4.385881000 seconds user
+       0.320226000 seconds sys
+```
+
+This can be observe by doing the same as before with `perf`. Before the time elapsed was around 40s and now about 4.7s. The same observation can be done with the cache missing:
+- optimzed  : 42103472
+- basic     : 406627550
+
+
+])
+
+
+#task([
+  Show l1 cache missing for ex1 :
+],[
+  #table(
+    columns: (1.5fr, 1fr),
+    stroke: none,
+    [
+      Not optimized
+      ```
+407036282 L1-dcache-load-misses
+
+39.868545227 seconds time elapsed
+39.115950000 seconds user
+0.347522000 seconds sys
+
+      ```
+    ],[
+      Optimzed
+      ```
+42027157 L1-dcache-load-misses
+
+4.132272210 seconds time elapsed
+3.778635000 seconds user
+0.296472000 seconds sys
+      ```
+    ]
+  )
+  There still is a 10 factor as before between the L1 cache misses.
+])
+
+
+#task([Event analysed with `perf`:],[
+
+- *Instructions*: It indicates the number of cpu instruction done during the program is running.
+- *Cache-missing*: This happens when the data used is not currently store in the cache. The ask is passed to the next memory : RAM.
+- *Branch-misses*: It happens when there is conditional branch. The CPU tries to predict the next instruction and misses.
+- *L1-dcache-load-misses*: It happens when the data is not store in the cache L1. It has the next memory technology, here cache L2.
+- *Cpu-migrations*: It indictes the number of times the program has changed of CPU thread.
+- *Context-switches*: The program is sharing the resource with others. Sometimes, it less the cpu core to another. This involves a context-switching. It has to change some register like the PC.
+
+])
+
+
+#task([Timing performance of `perf`], [
+  There is some executions of the optimized program:
+
+  #figure(table(
+    columns: (1fr, 1fr),
+    // stroke: none,
+    [*Without `perf`*], [*With `perf`*],
+    [
+      ```
+real	0m 4.44s
+user	0m 3.83s
+sys	  0m 0.29s
+
+      ```
+    ],
+    [
+      ```
+real	0m 4.38s
+user	0m 4.05s
+sys	  0m 0.27s
+
+      ```
+    ],[
+      ```
+real	0m 4.75s
+user	0m 4.09s
+sys	  0m 0.34s
+
+      ```
+    ],[
+      ```
+real	0m 4.75s
+user	0m 4.09s
+sys	  0m 0.34s
+
+      ```
+    ],
+  ),
+  caption:[Impact of the tool `perf`]
+  )<impact-perf>
+
+  In @impact-perf, the tool does not significantly affect program execution. It is certainly due to the CPU allocations.
+
+])
+
--- a/src/05-optimization/ex01/basic.c
+++ b/src/05-optimization/ex01/basic.c
@@ -0,0 +1,23 @@
+#include <stdint.h>
+
+#define SIZE 5000
+
+static int32_t array[SIZE][SIZE];
+
+int main (void)
+{
+    int i, j, k;
+
+    for (k = 0; k < 10; k++)
+    {
+        for (i = 0; i < SIZE; i++)
+        {
+            for (j = 0; j < SIZE; j++)
+            {
+                array[j][i]++;
+            }
+        }
+    }
+    return 0;
+}
+
--- a/src/05-optimization/ex01/optimized.c
+++ b/src/05-optimization/ex01/optimized.c
@@ -0,0 +1,22 @@
+#include <stdint.h>
+
+#define SIZE 5000
+
+static int32_t array[SIZE][SIZE];
+
+int main (void)
+{
+    int i, j;
+
+
+    for (i = 0; i < SIZE; i++)
+    {
+        for (j = 0; j < SIZE; j++)
+        {
+            array[j][i]+= 10;
+        }
+    }
+    
+    return 0;
+}
+