1
0

feat(lab05): add ex 2 with report

This commit is contained in:
2026-05-28 17:03:51 +02:00
parent 6c5c8750b3
commit 481a53d0f8
6 changed files with 129 additions and 35 deletions

2
.gitignore vendored
View File

@@ -62,3 +62,5 @@ src/04-multiprocessing/multiprocessing
src/04-multiprocessing/cgroups
src/05-optimization/ex01/basic
src/05-optimization/ex01/optimized
src/05-optimization/ex02/optimized
src/05-optimization/ex02/basic

View File

@@ -5,6 +5,7 @@
In this laboratory, the usage of `perf` as tool is experimented.
== Exercise 1
```
Performance counter stats for './ex1':
@@ -182,3 +183,58 @@ sys 0m 0.34s
])
== Exercise 2
The program fills an array of random between 0 and 512. Then it iterates 10'000 times over all the array to make a sum of all number generated equal or bigger than 256.
#figure(
table(
columns: (1fr),
[Withtout Optimization],
[
```
26170.47 msec task-clock # 1.000 CPUs utilized
17 context-switches # 0.650 /sec
0 cpu-migrations # 0.000 /sec
74 page-faults # 2.828 /sec
21354981945 cycles # 0.816 GHz
14768657990 instructions # 0.69 insn per cycle
988541451 branches # 37.773 M/sec
327869867 branch-misses # 33.17% of all branches
26.178296596 seconds time elapsed
26.117025000 seconds user
0.003961000 seconds sys
```
], [With "sort" optimization],[
```
23430.74 msec task-clock
17 context-switches # 0.726 /sec
0 cpu-migrations # 0.000 /sec
109 page-faults # 4.652 /sec
19119368029 cycles # 0.816 GHz
14818405467 instructions # 0.78 insn per cycle
997843744 branches # 42.587 M/sec
805002 branch-misses # 0.08% of all branches
23.439504220 seconds time elapsed
23.382177000 seconds user
0.003961000 seconds sys
```
]
),
caption:[Ex02 timing optimization]
)<sort-optimization>
In @sort-optimization, there is a gain of 3s. But, an important augmentation of the branch misses. The rate has decreased from 33.17% (missed) to 0.08%.
The same test was done with the `-01` compiler flag and there is almost no difference between the two scipts. The optimzed is around 4.12s and the basic is around 4.6s. The difference of 0.6 sec can be explained with the sort algorithm used in the optimized script, because this is the only difference.
== Exercise 3

View File

@@ -1,8 +1,6 @@
# Noms de tes deux exécutables
EXE_BASIC=basic
EXE_OPTI=optimized
# Fichiers sources pour chaque version (à adapter avec tes vrais noms de fichiers)
SRCS_BASIC=basic.c
SRCS_OPTI=optimized.c
@@ -12,16 +10,15 @@ endif
CFLAGS=-Wall -Wextra -g -c -O1 -MD -std=gnu11 -D_GNU_SOURCE
# Configuration Nano
ifeq ($(target),nano)
TOOLCHAIN_PATH=/buildroot/output/host/usr/bin/
TOOLCHAIN=$(TOOLCHAIN_PATH)aarch64-linux-
CFLAGS+=-mcpu=cortex-a53 -funwind-tables
CFLAGS+=-fno-omit-frame-pointer
##CFLAGS+=-O2
EXEC_SUFFIX=
endif
# Configuration Host
ifeq ($(target),host)
TOOLCHAIN=
EXEC_SUFFIX=_h
@@ -35,41 +32,30 @@ OBJDUMP=$(TOOLCHAIN)objdump
OBJDIR=.obj/$(target)
# Génération des listes de fichiers objets (.o) pour chaque programme
OBJS_BASIC = $(addprefix $(OBJDIR)/, $(SRCS_BASIC:.c=.o))
OBJS_OPTI = $(addprefix $(OBJDIR)/, $(SRCS_OPTI:.c=.o))
# Noms finaux des exécutables en fonction de la cible (nano ou host)
EXEC_BASIC = $(EXE_BASIC)$(EXEC_SUFFIX)
EXEC_OPTI = $(EXE_OPTI)$(EXEC_SUFFIX)
# --- RÈGLES DE COMPILATION ---
# Règle par défaut : build les deux programmes
all: $(EXEC_BASIC) $(EXEC_OPTI)
# Règles pour build individuellement : "make basic" ou "make opti"
basic: $(EXEC_BASIC)
opti: $(EXEC_OPTI)
# Règle de compilation des .c en .o
# Le "| $(OBJDIR)" signifie que le dossier doit exister, mais que sa date de modification ne force pas la recompilation
$(OBJDIR)/%.o: %.c | $(OBJDIR)
$(CC) $(CFLAGS) $< -o $@
# Édition de liens (Linker) pour le programme basique
$(EXEC_BASIC): $(OBJS_BASIC)
$(LD) $(OBJS_BASIC) $(LDFLAGS) -o $@
# Édition de liens (Linker) pour le programme optimisé
$(EXEC_OPTI): $(OBJS_OPTI)
$(LD) $(OBJS_OPTI) $(LDFLAGS) -o $@
# Création du dossier d'objets
$(OBJDIR):
mkdir -p $(OBJDIR)
# --- RÈGLES DE NETTOYAGE ET DUMP ---
clean:
rm -Rf $(OBJDIR) $(EXEC_BASIC) $(EXEC_OPTI) *~ t_*.txt
@@ -83,7 +69,6 @@ dump_basic: $(EXEC_BASIC)
dump_opti: $(EXEC_OPTI)
$(OBJDUMP) -dS $(EXEC_OPTI) > t_opti.txt
# Inclusion des dépendances générées par l'option -MD
-include $(OBJS_BASIC:.o=.d) $(OBJS_OPTI:.o=.d)
.PHONY: all basic opti clean clean_all dump_basic dump_opti

View File

@@ -1,5 +1,8 @@
EXE=ex2
SRCS=$(wildcard *.c)
EXE_BASIC=basic
EXE_OPTI=optimized
SRCS_BASIC=basic.c
SRCS_OPTI=optimized.c
ifeq ($(target),)
target=nano
@@ -10,14 +13,15 @@ CFLAGS=-Wall -Wextra -g -c -O0 -MD -std=gnu11 -D_GNU_SOURCE
ifeq ($(target),nano)
TOOLCHAIN_PATH=/buildroot/output/host/usr/bin/
TOOLCHAIN=$(TOOLCHAIN_PATH)aarch64-linux-
CFLAGS+=-mcpu=cortex-a53 -funwind-tables -fno-omit-frame-pointer
##CFLAGS+=-O2
OBJDIR=.obj/nano
EXEC=$(EXE)
CFLAGS+=-mcpu=cortex-a53 -funwind-tables
CFLAGS+=-fno-omit-frame-pointer
#CFLAGS+=-O2
EXEC_SUFFIX=
endif
ifeq ($(target),host)
EXEC=$(EXE)_h
TOOLCHAIN=
EXEC_SUFFIX=_h
endif
CC=$(TOOLCHAIN)gcc
@@ -27,28 +31,44 @@ STRIP=$(TOOLCHAIN)strip
OBJDUMP=$(TOOLCHAIN)objdump
OBJDIR=.obj/$(target)
OBJS= $(addprefix $(OBJDIR)/, $(SRCS:.c=.o))
$(OBJDIR)/%o: %c
OBJS_BASIC = $(addprefix $(OBJDIR)/, $(SRCS_BASIC:.c=.o))
OBJS_OPTI = $(addprefix $(OBJDIR)/, $(SRCS_OPTI:.c=.o))
EXEC_BASIC = $(EXE_BASIC)$(EXEC_SUFFIX)
EXEC_OPTI = $(EXE_OPTI)$(EXEC_SUFFIX)
all: $(EXEC_BASIC) $(EXEC_OPTI)
basic: $(EXEC_BASIC)
opti: $(EXEC_OPTI)
$(OBJDIR)/%.o: %.c | $(OBJDIR)
$(CC) $(CFLAGS) $< -o $@
all: $(OBJDIR)/ $(EXEC)
$(EXEC_BASIC): $(OBJS_BASIC)
$(LD) $(OBJS_BASIC) $(LDFLAGS) -o $@
$(EXEC): $(OBJS) $(LINKER_SCRIPT)
$(LD) $(OBJS) $(LDFLAGS) -o $@
$(EXEC_OPTI): $(OBJS_OPTI)
$(LD) $(OBJS_OPTI) $(LDFLAGS) -o $@
$(OBJDIR)/:
$(OBJDIR):
mkdir -p $(OBJDIR)
clean:
rm -Rf $(OBJDIR) $(EXEC) $(EXEC)_s *~ t.txt
rm -Rf $(OBJDIR) $(EXEC_BASIC) $(EXEC_OPTI) *~ t_*.txt
clean_all: clean
rm -Rf .obj $(EXE) $(EXE)_s $(EXE)_a $(EXE)_a_s $(EXE)_h $(EXE)_h_s
rm -Rf .obj $(EXE_BASIC)* $(EXE_OPTI)*
dump: all
$(OBJDUMP) -dS $(EXEC) > t.txt
dump_basic: $(EXEC_BASIC)
$(OBJDUMP) -dS $(EXEC_BASIC) > t_basic.txt
-include $(OBJS:.o=.d)
dump_opti: $(EXEC_OPTI)
$(OBJDUMP) -dS $(EXEC_OPTI) > t_opti.txt
.PHONY: all clean clean_all dump
-include $(OBJS_BASIC:.o=.d) $(OBJS_OPTI:.o=.d)
.PHONY: all basic opti clean clean_all dump_basic dump_opti

View File

@@ -0,0 +1,31 @@
#include <stdio.h>
#include <stdlib.h>
#define SIZE 65536
static int compare (const void* a, const void* b)
{
return *(short*)a - *(short*)b;
}
int main()
{
// generate data
short data[SIZE];
for (int i = 0; i < SIZE; i++) {
data[i] = rand() % 512;
}
qsort(data, SIZE, sizeof(data[0]), compare);
long long sum = 0;
for (int j = 0; j < 10000; j++) {
for (int i = 0; i < SIZE; i++) {
if (data[i] >= 256) {
sum += data[i];
}
}
}
printf ("sum=%lld\n", sum);
}