feat(lab05): add ex 2 with report
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -62,3 +62,5 @@ src/04-multiprocessing/multiprocessing
|
||||
src/04-multiprocessing/cgroups
|
||||
src/05-optimization/ex01/basic
|
||||
src/05-optimization/ex01/optimized
|
||||
src/05-optimization/ex02/optimized
|
||||
src/05-optimization/ex02/basic
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
In this laboratory, the usage of `perf` as tool is experimented.
|
||||
|
||||
|
||||
== Exercise 1
|
||||
|
||||
```
|
||||
Performance counter stats for './ex1':
|
||||
@@ -182,3 +183,58 @@ sys 0m 0.34s
|
||||
|
||||
])
|
||||
|
||||
== Exercise 2
|
||||
|
||||
The program fills an array of random between 0 and 512. Then it iterates 10'000 times over all the array to make a sum of all number generated equal or bigger than 256.
|
||||
|
||||
|
||||
#figure(
|
||||
table(
|
||||
columns: (1fr),
|
||||
[Withtout Optimization],
|
||||
[
|
||||
```
|
||||
|
||||
26170.47 msec task-clock # 1.000 CPUs utilized
|
||||
17 context-switches # 0.650 /sec
|
||||
0 cpu-migrations # 0.000 /sec
|
||||
74 page-faults # 2.828 /sec
|
||||
21354981945 cycles # 0.816 GHz
|
||||
14768657990 instructions # 0.69 insn per cycle
|
||||
988541451 branches # 37.773 M/sec
|
||||
327869867 branch-misses # 33.17% of all branches
|
||||
|
||||
26.178296596 seconds time elapsed
|
||||
|
||||
26.117025000 seconds user
|
||||
0.003961000 seconds sys
|
||||
```
|
||||
], [With "sort" optimization],[
|
||||
```
|
||||
23430.74 msec task-clock
|
||||
17 context-switches # 0.726 /sec
|
||||
0 cpu-migrations # 0.000 /sec
|
||||
109 page-faults # 4.652 /sec
|
||||
19119368029 cycles # 0.816 GHz
|
||||
14818405467 instructions # 0.78 insn per cycle
|
||||
997843744 branches # 42.587 M/sec
|
||||
805002 branch-misses # 0.08% of all branches
|
||||
|
||||
23.439504220 seconds time elapsed
|
||||
|
||||
23.382177000 seconds user
|
||||
0.003961000 seconds sys
|
||||
```
|
||||
]
|
||||
),
|
||||
caption:[Ex02 timing optimization]
|
||||
)<sort-optimization>
|
||||
|
||||
In @sort-optimization, there is a gain of 3s. But, an important augmentation of the branch misses. The rate has decreased from 33.17% (missed) to 0.08%.
|
||||
|
||||
The same test was done with the `-01` compiler flag and there is almost no difference between the two scipts. The optimzed is around 4.12s and the basic is around 4.6s. The difference of 0.6 sec can be explained with the sort algorithm used in the optimized script, because this is the only difference.
|
||||
|
||||
|
||||
== Exercise 3
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
# Noms de tes deux exécutables
|
||||
EXE_BASIC=basic
|
||||
EXE_OPTI=optimized
|
||||
|
||||
# Fichiers sources pour chaque version (à adapter avec tes vrais noms de fichiers)
|
||||
SRCS_BASIC=basic.c
|
||||
SRCS_OPTI=optimized.c
|
||||
|
||||
@@ -12,16 +10,15 @@ endif
|
||||
|
||||
CFLAGS=-Wall -Wextra -g -c -O1 -MD -std=gnu11 -D_GNU_SOURCE
|
||||
|
||||
# Configuration Nano
|
||||
ifeq ($(target),nano)
|
||||
TOOLCHAIN_PATH=/buildroot/output/host/usr/bin/
|
||||
TOOLCHAIN=$(TOOLCHAIN_PATH)aarch64-linux-
|
||||
CFLAGS+=-mcpu=cortex-a53 -funwind-tables
|
||||
CFLAGS+=-fno-omit-frame-pointer
|
||||
##CFLAGS+=-O2
|
||||
EXEC_SUFFIX=
|
||||
endif
|
||||
|
||||
# Configuration Host
|
||||
ifeq ($(target),host)
|
||||
TOOLCHAIN=
|
||||
EXEC_SUFFIX=_h
|
||||
@@ -35,41 +32,30 @@ OBJDUMP=$(TOOLCHAIN)objdump
|
||||
|
||||
OBJDIR=.obj/$(target)
|
||||
|
||||
# Génération des listes de fichiers objets (.o) pour chaque programme
|
||||
OBJS_BASIC = $(addprefix $(OBJDIR)/, $(SRCS_BASIC:.c=.o))
|
||||
OBJS_OPTI = $(addprefix $(OBJDIR)/, $(SRCS_OPTI:.c=.o))
|
||||
|
||||
# Noms finaux des exécutables en fonction de la cible (nano ou host)
|
||||
EXEC_BASIC = $(EXE_BASIC)$(EXEC_SUFFIX)
|
||||
EXEC_OPTI = $(EXE_OPTI)$(EXEC_SUFFIX)
|
||||
|
||||
# --- RÈGLES DE COMPILATION ---
|
||||
|
||||
# Règle par défaut : build les deux programmes
|
||||
all: $(EXEC_BASIC) $(EXEC_OPTI)
|
||||
|
||||
# Règles pour build individuellement : "make basic" ou "make opti"
|
||||
basic: $(EXEC_BASIC)
|
||||
opti: $(EXEC_OPTI)
|
||||
|
||||
# Règle de compilation des .c en .o
|
||||
# Le "| $(OBJDIR)" signifie que le dossier doit exister, mais que sa date de modification ne force pas la recompilation
|
||||
$(OBJDIR)/%.o: %.c | $(OBJDIR)
|
||||
$(CC) $(CFLAGS) $< -o $@
|
||||
|
||||
# Édition de liens (Linker) pour le programme basique
|
||||
$(EXEC_BASIC): $(OBJS_BASIC)
|
||||
$(LD) $(OBJS_BASIC) $(LDFLAGS) -o $@
|
||||
|
||||
# Édition de liens (Linker) pour le programme optimisé
|
||||
$(EXEC_OPTI): $(OBJS_OPTI)
|
||||
$(LD) $(OBJS_OPTI) $(LDFLAGS) -o $@
|
||||
|
||||
# Création du dossier d'objets
|
||||
$(OBJDIR):
|
||||
mkdir -p $(OBJDIR)
|
||||
|
||||
# --- RÈGLES DE NETTOYAGE ET DUMP ---
|
||||
|
||||
clean:
|
||||
rm -Rf $(OBJDIR) $(EXEC_BASIC) $(EXEC_OPTI) *~ t_*.txt
|
||||
@@ -83,7 +69,6 @@ dump_basic: $(EXEC_BASIC)
|
||||
dump_opti: $(EXEC_OPTI)
|
||||
$(OBJDUMP) -dS $(EXEC_OPTI) > t_opti.txt
|
||||
|
||||
# Inclusion des dépendances générées par l'option -MD
|
||||
-include $(OBJS_BASIC:.o=.d) $(OBJS_OPTI:.o=.d)
|
||||
|
||||
.PHONY: all basic opti clean clean_all dump_basic dump_opti
|
||||
@@ -1,5 +1,8 @@
|
||||
EXE=ex2
|
||||
SRCS=$(wildcard *.c)
|
||||
EXE_BASIC=basic
|
||||
EXE_OPTI=optimized
|
||||
|
||||
SRCS_BASIC=basic.c
|
||||
SRCS_OPTI=optimized.c
|
||||
|
||||
ifeq ($(target),)
|
||||
target=nano
|
||||
@@ -10,14 +13,15 @@ CFLAGS=-Wall -Wextra -g -c -O0 -MD -std=gnu11 -D_GNU_SOURCE
|
||||
ifeq ($(target),nano)
|
||||
TOOLCHAIN_PATH=/buildroot/output/host/usr/bin/
|
||||
TOOLCHAIN=$(TOOLCHAIN_PATH)aarch64-linux-
|
||||
CFLAGS+=-mcpu=cortex-a53 -funwind-tables -fno-omit-frame-pointer
|
||||
##CFLAGS+=-O2
|
||||
OBJDIR=.obj/nano
|
||||
EXEC=$(EXE)
|
||||
CFLAGS+=-mcpu=cortex-a53 -funwind-tables
|
||||
CFLAGS+=-fno-omit-frame-pointer
|
||||
#CFLAGS+=-O2
|
||||
EXEC_SUFFIX=
|
||||
endif
|
||||
|
||||
ifeq ($(target),host)
|
||||
EXEC=$(EXE)_h
|
||||
TOOLCHAIN=
|
||||
EXEC_SUFFIX=_h
|
||||
endif
|
||||
|
||||
CC=$(TOOLCHAIN)gcc
|
||||
@@ -27,28 +31,44 @@ STRIP=$(TOOLCHAIN)strip
|
||||
OBJDUMP=$(TOOLCHAIN)objdump
|
||||
|
||||
OBJDIR=.obj/$(target)
|
||||
OBJS= $(addprefix $(OBJDIR)/, $(SRCS:.c=.o))
|
||||
|
||||
$(OBJDIR)/%o: %c
|
||||
OBJS_BASIC = $(addprefix $(OBJDIR)/, $(SRCS_BASIC:.c=.o))
|
||||
OBJS_OPTI = $(addprefix $(OBJDIR)/, $(SRCS_OPTI:.c=.o))
|
||||
|
||||
EXEC_BASIC = $(EXE_BASIC)$(EXEC_SUFFIX)
|
||||
EXEC_OPTI = $(EXE_OPTI)$(EXEC_SUFFIX)
|
||||
|
||||
|
||||
all: $(EXEC_BASIC) $(EXEC_OPTI)
|
||||
|
||||
basic: $(EXEC_BASIC)
|
||||
opti: $(EXEC_OPTI)
|
||||
|
||||
$(OBJDIR)/%.o: %.c | $(OBJDIR)
|
||||
$(CC) $(CFLAGS) $< -o $@
|
||||
|
||||
all: $(OBJDIR)/ $(EXEC)
|
||||
$(EXEC_BASIC): $(OBJS_BASIC)
|
||||
$(LD) $(OBJS_BASIC) $(LDFLAGS) -o $@
|
||||
|
||||
$(EXEC): $(OBJS) $(LINKER_SCRIPT)
|
||||
$(LD) $(OBJS) $(LDFLAGS) -o $@
|
||||
$(EXEC_OPTI): $(OBJS_OPTI)
|
||||
$(LD) $(OBJS_OPTI) $(LDFLAGS) -o $@
|
||||
|
||||
$(OBJDIR)/:
|
||||
$(OBJDIR):
|
||||
mkdir -p $(OBJDIR)
|
||||
|
||||
|
||||
clean:
|
||||
rm -Rf $(OBJDIR) $(EXEC) $(EXEC)_s *~ t.txt
|
||||
rm -Rf $(OBJDIR) $(EXEC_BASIC) $(EXEC_OPTI) *~ t_*.txt
|
||||
|
||||
clean_all: clean
|
||||
rm -Rf .obj $(EXE) $(EXE)_s $(EXE)_a $(EXE)_a_s $(EXE)_h $(EXE)_h_s
|
||||
rm -Rf .obj $(EXE_BASIC)* $(EXE_OPTI)*
|
||||
|
||||
dump: all
|
||||
$(OBJDUMP) -dS $(EXEC) > t.txt
|
||||
dump_basic: $(EXEC_BASIC)
|
||||
$(OBJDUMP) -dS $(EXEC_BASIC) > t_basic.txt
|
||||
|
||||
-include $(OBJS:.o=.d)
|
||||
dump_opti: $(EXEC_OPTI)
|
||||
$(OBJDUMP) -dS $(EXEC_OPTI) > t_opti.txt
|
||||
|
||||
.PHONY: all clean clean_all dump
|
||||
-include $(OBJS_BASIC:.o=.d) $(OBJS_OPTI:.o=.d)
|
||||
|
||||
.PHONY: all basic opti clean clean_all dump_basic dump_opti
|
||||
31
src/05-optimization/ex02/optimized.c
Normal file
31
src/05-optimization/ex02/optimized.c
Normal file
@@ -0,0 +1,31 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define SIZE 65536
|
||||
|
||||
static int compare (const void* a, const void* b)
|
||||
{
|
||||
return *(short*)a - *(short*)b;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// generate data
|
||||
short data[SIZE];
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
data[i] = rand() % 512;
|
||||
}
|
||||
|
||||
qsort(data, SIZE, sizeof(data[0]), compare);
|
||||
|
||||
|
||||
long long sum = 0;
|
||||
for (int j = 0; j < 10000; j++) {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
if (data[i] >= 256) {
|
||||
sum += data[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
printf ("sum=%lld\n", sum);
|
||||
}
|
||||
Reference in New Issue
Block a user