diff --git a/.gitignore b/.gitignore index bd01306..0341fa9 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,5 @@ src/04-multiprocessing/multiprocessing src/04-multiprocessing/cgroups src/05-optimization/ex01/basic src/05-optimization/ex01/optimized +src/05-optimization/ex02/optimized +src/05-optimization/ex02/basic diff --git a/doc/lab05-optimization/main.typ b/doc/lab05-optimization/main.typ index 220bc64..3c14c43 100644 --- a/doc/lab05-optimization/main.typ +++ b/doc/lab05-optimization/main.typ @@ -5,6 +5,7 @@ In this laboratory, the usage of `perf` as tool is experimented. +== Exercise 1 ``` Performance counter stats for './ex1': @@ -182,3 +183,58 @@ sys 0m 0.34s ]) +== Exercise 2 + +The program fills an array of random between 0 and 512. Then it iterates 10'000 times over all the array to make a sum of all number generated equal or bigger than 256. + + +#figure( + table( + columns: (1fr), + [Withtout Optimization], + [ +``` + + 26170.47 msec task-clock # 1.000 CPUs utilized + 17 context-switches # 0.650 /sec + 0 cpu-migrations # 0.000 /sec + 74 page-faults # 2.828 /sec + 21354981945 cycles # 0.816 GHz + 14768657990 instructions # 0.69 insn per cycle + 988541451 branches # 37.773 M/sec + 327869867 branch-misses # 33.17% of all branches + +26.178296596 seconds time elapsed + +26.117025000 seconds user + 0.003961000 seconds sys +``` + ], [With "sort" optimization],[ + ``` + 23430.74 msec task-clock + 17 context-switches # 0.726 /sec + 0 cpu-migrations # 0.000 /sec + 109 page-faults # 4.652 /sec + 19119368029 cycles # 0.816 GHz + 14818405467 instructions # 0.78 insn per cycle + 997843744 branches # 42.587 M/sec + 805002 branch-misses # 0.08% of all branches + +23.439504220 seconds time elapsed + +23.382177000 seconds user + 0.003961000 seconds sys +``` + ] + ), + caption:[Ex02 timing optimization] +) + +In @sort-optimization, there is a gain of 3s. But, an important augmentation of the branch misses. The rate has decreased from 33.17% (missed) to 0.08%. + +The same test was done with the `-01` compiler flag and there is almost no difference between the two scipts. The optimzed is around 4.12s and the basic is around 4.6s. The difference of 0.6 sec can be explained with the sort algorithm used in the optimized script, because this is the only difference. + + +== Exercise 3 + + diff --git a/src/05-optimization/ex01/Makefile b/src/05-optimization/ex01/Makefile index 62f00f9..5dbb5e4 100644 --- a/src/05-optimization/ex01/Makefile +++ b/src/05-optimization/ex01/Makefile @@ -1,8 +1,6 @@ -# Noms de tes deux exécutables EXE_BASIC=basic EXE_OPTI=optimized -# Fichiers sources pour chaque version (à adapter avec tes vrais noms de fichiers) SRCS_BASIC=basic.c SRCS_OPTI=optimized.c @@ -12,16 +10,15 @@ endif CFLAGS=-Wall -Wextra -g -c -O1 -MD -std=gnu11 -D_GNU_SOURCE -# Configuration Nano ifeq ($(target),nano) TOOLCHAIN_PATH=/buildroot/output/host/usr/bin/ TOOLCHAIN=$(TOOLCHAIN_PATH)aarch64-linux- CFLAGS+=-mcpu=cortex-a53 -funwind-tables CFLAGS+=-fno-omit-frame-pointer +##CFLAGS+=-O2 EXEC_SUFFIX= endif -# Configuration Host ifeq ($(target),host) TOOLCHAIN= EXEC_SUFFIX=_h @@ -35,41 +32,30 @@ OBJDUMP=$(TOOLCHAIN)objdump OBJDIR=.obj/$(target) -# Génération des listes de fichiers objets (.o) pour chaque programme OBJS_BASIC = $(addprefix $(OBJDIR)/, $(SRCS_BASIC:.c=.o)) OBJS_OPTI = $(addprefix $(OBJDIR)/, $(SRCS_OPTI:.c=.o)) -# Noms finaux des exécutables en fonction de la cible (nano ou host) EXEC_BASIC = $(EXE_BASIC)$(EXEC_SUFFIX) EXEC_OPTI = $(EXE_OPTI)$(EXEC_SUFFIX) -# --- RÈGLES DE COMPILATION --- -# Règle par défaut : build les deux programmes all: $(EXEC_BASIC) $(EXEC_OPTI) -# Règles pour build individuellement : "make basic" ou "make opti" basic: $(EXEC_BASIC) opti: $(EXEC_OPTI) -# Règle de compilation des .c en .o -# Le "| $(OBJDIR)" signifie que le dossier doit exister, mais que sa date de modification ne force pas la recompilation $(OBJDIR)/%.o: %.c | $(OBJDIR) $(CC) $(CFLAGS) $< -o $@ -# Édition de liens (Linker) pour le programme basique $(EXEC_BASIC): $(OBJS_BASIC) $(LD) $(OBJS_BASIC) $(LDFLAGS) -o $@ -# Édition de liens (Linker) pour le programme optimisé $(EXEC_OPTI): $(OBJS_OPTI) $(LD) $(OBJS_OPTI) $(LDFLAGS) -o $@ -# Création du dossier d'objets $(OBJDIR): mkdir -p $(OBJDIR) -# --- RÈGLES DE NETTOYAGE ET DUMP --- clean: rm -Rf $(OBJDIR) $(EXEC_BASIC) $(EXEC_OPTI) *~ t_*.txt @@ -83,7 +69,6 @@ dump_basic: $(EXEC_BASIC) dump_opti: $(EXEC_OPTI) $(OBJDUMP) -dS $(EXEC_OPTI) > t_opti.txt -# Inclusion des dépendances générées par l'option -MD -include $(OBJS_BASIC:.o=.d) $(OBJS_OPTI:.o=.d) .PHONY: all basic opti clean clean_all dump_basic dump_opti \ No newline at end of file diff --git a/src/05-optimization/ex02/Makefile b/src/05-optimization/ex02/Makefile index d043773..60bf58f 100644 --- a/src/05-optimization/ex02/Makefile +++ b/src/05-optimization/ex02/Makefile @@ -1,5 +1,8 @@ -EXE=ex2 -SRCS=$(wildcard *.c) +EXE_BASIC=basic +EXE_OPTI=optimized + +SRCS_BASIC=basic.c +SRCS_OPTI=optimized.c ifeq ($(target),) target=nano @@ -10,14 +13,15 @@ CFLAGS=-Wall -Wextra -g -c -O0 -MD -std=gnu11 -D_GNU_SOURCE ifeq ($(target),nano) TOOLCHAIN_PATH=/buildroot/output/host/usr/bin/ TOOLCHAIN=$(TOOLCHAIN_PATH)aarch64-linux- -CFLAGS+=-mcpu=cortex-a53 -funwind-tables -fno-omit-frame-pointer -##CFLAGS+=-O2 -OBJDIR=.obj/nano -EXEC=$(EXE) +CFLAGS+=-mcpu=cortex-a53 -funwind-tables +CFLAGS+=-fno-omit-frame-pointer +#CFLAGS+=-O2 +EXEC_SUFFIX= endif ifeq ($(target),host) -EXEC=$(EXE)_h +TOOLCHAIN= +EXEC_SUFFIX=_h endif CC=$(TOOLCHAIN)gcc @@ -27,28 +31,44 @@ STRIP=$(TOOLCHAIN)strip OBJDUMP=$(TOOLCHAIN)objdump OBJDIR=.obj/$(target) -OBJS= $(addprefix $(OBJDIR)/, $(SRCS:.c=.o)) -$(OBJDIR)/%o: %c +OBJS_BASIC = $(addprefix $(OBJDIR)/, $(SRCS_BASIC:.c=.o)) +OBJS_OPTI = $(addprefix $(OBJDIR)/, $(SRCS_OPTI:.c=.o)) + +EXEC_BASIC = $(EXE_BASIC)$(EXEC_SUFFIX) +EXEC_OPTI = $(EXE_OPTI)$(EXEC_SUFFIX) + + +all: $(EXEC_BASIC) $(EXEC_OPTI) + +basic: $(EXEC_BASIC) +opti: $(EXEC_OPTI) + +$(OBJDIR)/%.o: %.c | $(OBJDIR) $(CC) $(CFLAGS) $< -o $@ -all: $(OBJDIR)/ $(EXEC) +$(EXEC_BASIC): $(OBJS_BASIC) + $(LD) $(OBJS_BASIC) $(LDFLAGS) -o $@ -$(EXEC): $(OBJS) $(LINKER_SCRIPT) - $(LD) $(OBJS) $(LDFLAGS) -o $@ +$(EXEC_OPTI): $(OBJS_OPTI) + $(LD) $(OBJS_OPTI) $(LDFLAGS) -o $@ -$(OBJDIR)/: +$(OBJDIR): mkdir -p $(OBJDIR) + clean: - rm -Rf $(OBJDIR) $(EXEC) $(EXEC)_s *~ t.txt + rm -Rf $(OBJDIR) $(EXEC_BASIC) $(EXEC_OPTI) *~ t_*.txt clean_all: clean - rm -Rf .obj $(EXE) $(EXE)_s $(EXE)_a $(EXE)_a_s $(EXE)_h $(EXE)_h_s + rm -Rf .obj $(EXE_BASIC)* $(EXE_OPTI)* -dump: all - $(OBJDUMP) -dS $(EXEC) > t.txt +dump_basic: $(EXEC_BASIC) + $(OBJDUMP) -dS $(EXEC_BASIC) > t_basic.txt --include $(OBJS:.o=.d) +dump_opti: $(EXEC_OPTI) + $(OBJDUMP) -dS $(EXEC_OPTI) > t_opti.txt -.PHONY: all clean clean_all dump +-include $(OBJS_BASIC:.o=.d) $(OBJS_OPTI:.o=.d) + +.PHONY: all basic opti clean clean_all dump_basic dump_opti \ No newline at end of file diff --git a/src/05-optimization/ex02/main.c b/src/05-optimization/ex02/basic.c similarity index 100% rename from src/05-optimization/ex02/main.c rename to src/05-optimization/ex02/basic.c diff --git a/src/05-optimization/ex02/optimized.c b/src/05-optimization/ex02/optimized.c new file mode 100644 index 0000000..82518c7 --- /dev/null +++ b/src/05-optimization/ex02/optimized.c @@ -0,0 +1,31 @@ +#include +#include + +#define SIZE 65536 + +static int compare (const void* a, const void* b) +{ + return *(short*)a - *(short*)b; +} + +int main() +{ + // generate data + short data[SIZE]; + for (int i = 0; i < SIZE; i++) { + data[i] = rand() % 512; + } + + qsort(data, SIZE, sizeof(data[0]), compare); + + + long long sum = 0; + for (int j = 0; j < 10000; j++) { + for (int i = 0; i < SIZE; i++) { + if (data[i] >= 256) { + sum += data[i]; + } + } + } + printf ("sum=%lld\n", sum); +}