From 78fb87e978d096139e565170c4737ce4163bc965 Mon Sep 17 00:00:00 2001 From: Klagarge Date: Mon, 17 Nov 2025 18:34:58 +0100 Subject: [PATCH] feat/reduction): add reduction generic PI int --- .../out/Reduce_Generic_IntI_justesse.html | 212 ++++++++++++++++++ .../out/Reduce_Generic_IntI_performance.html | 140 ++++++++++++ .../core/01_algorithme/generic/Reduction.cu.h | 20 ++ .../int/PI/device/reductionIntI_device.cu | 44 ++-- .../generic/int/PI/host/ReductionIntI.cu | 27 ++- .../generic/int/PI/host/ReductionIntI.h | 5 +- .../src/main/mainTest.cpp | 4 +- .../src/main/mainUse.cpp | 20 +- 8 files changed, 429 insertions(+), 43 deletions(-) create mode 100644 Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_justesse.html create mode 100644 Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_performance.html diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_justesse.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_justesse.html new file mode 100644 index 0000000..e366154 --- /dev/null +++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_justesse.html @@ -0,0 +1,212 @@ + + + + + + + Reduce_Generic_IntI_justesse + + + + + + +

Reduce_Generic_IntI_justesse

+ +
+Designed by CppTest +
+
+ +

Summary

+ + + + + + + + + + + + + +
TestsErrorsSuccessTime (s)
00100%1.000000
+
+ +

Test suites

+ + + + + + + + + + + + + + + +
NameTestsErrorsSuccessTime (s)
TestReductionGenericI130100%1.000000
+
+ +

Suite: TestReductionGenericI

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameErrorsSuccessTime (s)
testDB20true0.000000
testDB40true0.000000
testDB80true0.000000
testDB160true0.000000
testDB320true0.000000
testDB640true0.000000
testDB1280true0.000000
testDB2560true0.000000
testDB5120true0.000000
testDB10240true0.000000
testGrid0true1.000000
testMonoBlock0true0.000000
testSpecialeMax0true0.000000
+

Back to top +

+
+ + +

+ + Valid XHTML 1.0 Strict + +

+ + diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_performance.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_performance.html new file mode 100644 index 0000000..3b01ba1 --- /dev/null +++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_performance.html @@ -0,0 +1,140 @@ + + + + + + + Reduce_Generic_IntI_performance + + + + + + +

Reduce_Generic_IntI_performance

+ +
+Designed by CppTest +
+
+ +

Summary

+ + + + + + + + + + + + + +
TestsErrorsSuccessTime (s)
10100%11.000000
+
+ +

Test suites

+ + + + + + + + + + + + + + + +
NameTestsErrorsSuccessTime (s)
TestPerformance_RunnableGPU_A10100%11.000000
+
+ +

Suite: TestPerformance_RunnableGPU_A

+ + + + + + + + + + + + + +
NameErrorsSuccessTime (s)
performanceOnly0true11.000000
+

Back to top +

+
+ + +

+ + Valid XHTML 1.0 Strict + +

+ + diff --git a/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h b/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h index 23eed8a..fbd63b9 100755 --- a/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h +++ b/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h @@ -55,9 +55,11 @@ class Reduction { //static __device__ void reduce(T (*OP)(T, T) ,void (*ATOMIC_OP)(T*, T), T* tabSM, T* ptrResultGM) // idem ci-dessus mais sans define { // Meme principe que ReductionAdd + reductionIntraBlock(OP,tabSM); // TODO ReductionGeneric // Meme principe que ReductionAdd + reductionInterBlock(ATOMIC_OP,tabSM, ptrResultGM); } private: @@ -76,6 +78,11 @@ class Reduction { // TODO ReductionGeneric // Meme principe que ReductionAdd // OP est la variable representant l'operateur binaire + + const int tidLocal = Thread2D::tidLocal(); + if (tidLocal < middle) { + tabSM[tidLocal] = OP(tabSM[tidLocal], tabSM[tidLocal + middle]); + } } /** @@ -88,6 +95,15 @@ class Reduction { // TODO ReductionGeneric // Meme principe que ReductionAdd // OP est la variable representant l'operateur binaire + + const int NB_THEAD_LOCAL = Thread2D::nbThreadLocal(); + int middle = NB_THEAD_LOCAL >> 1; + while (middle > 0) { + ecrasement(OP,tabSM, middle); + __syncthreads(); + middle = middle >> 1; + } + } /*--------------------------------------*\ @@ -101,6 +117,10 @@ class Reduction { // TODO ReductionGeneric // Meme principe que ReductionAdd // ATOMIC_OP est la variable representant l'operateur binaire atomic + + if (Thread2D::tidLocal() == 0) { + ATOMIC_OP(ptrResultGM, tabSM[0]); + } } }; diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu index 6f312a0..37793f9 100755 --- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu +++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu @@ -1,4 +1,4 @@ -#include "Thread1D.cu.h" +#include "Thread2D.cu.h" #include "cudas.h" #include "Reduction.cu.h" @@ -23,10 +23,16 @@ static __device__ void addAtomicV2(int* ptrX , int y); /** * 1 partout en tabSM */ -__global__ void KIntProtocoleI(int* ptrSumGM) - { +__global__ +void KIntProtocoleI(int* ptrSumGM) { // TODO ReductionIntI - } + + extern __shared__ int tabSM[]; + reductionIntraThread(tabSM); + __syncthreads(); + + Reduction::reduce(add, addAtomicV1, tabSM, ptrSumGM); +} /*--------------------------------------*\ |* Private *| @@ -35,29 +41,33 @@ __global__ void KIntProtocoleI(int* ptrSumGM) /** * 1 partout en tabSM */ -__device__ void reductionIntraThread(int* tabSM) - { +__device__ +void reductionIntraThread(int* tabSM) { // TODO ReductionIntI - } + const int tidLocal = Thread2D::tidLocal(); + tabSM[tidLocal] = 1; +} /*----------------------------*\ |* Operateur reduction *| \*---------------------------*/ __inline__ -__device__ int add(int x , int y) - { +__device__ +int add(int x , int y) { // TODO ReductionIntI - } + return x + y; +} /** * Utiliser la methode system : atomicAdd(pointeurDestination, valeurSource); */ __inline__ -__device__ void addAtomicV1(int* ptrX , int y) - { +__device__ +void addAtomicV1(int* ptrX , int y) { // TODO ReductionIntI - } + atomicAdd(ptrX, y); +} /** * 10x plus lent,mais plus flexible! @@ -65,17 +75,17 @@ __device__ void addAtomicV1(int* ptrX , int y) * Necessaire aussi pour des objets par exemple */ __device__ int volatile mutex = 0; //variable global -__device__ void addAtomicV2(int* ptrX , int y) - { +__device__ +void addAtomicV2(int* ptrX , int y) { Lock locker(&mutex); locker.lock(); // TODO ReductionIntI + *ptrX = *ptrX + y; locker.unlock(); - } +} /*----------------------------------------------------------------------*\ |* End *| \*---------------------------------------------------------------------*/ - diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu index b65099a..73fe4f4 100755 --- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu +++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu @@ -14,7 +14,9 @@ using std::to_string; |* Imported *| \*---------------------------------------------------------------------*/ -extern __global__ void KIntProtocoleI(int* ptrSumGM); +extern +__global__ +void KIntProtocoleI(int* ptrSumGM); /*----------------------------------------------------------------------*\ |* Implementation *| @@ -27,25 +29,28 @@ extern __global__ void KIntProtocoleI(int* ptrSumGM); ReductionIntI::ReductionIntI(const Grid& grid , int* ptrSum,bool isVerbose) : //RunnableGPU(grid, "Redude_Generic_IntI_" + to_string(grid.threadCounts()),isVerbose), // classe parente RunnableGPU(grid, "Reduce_Generic_IntI",isVerbose), // classe parente - ptrSum(ptrSum) - { + ptrSum(ptrSum) { // TODO ReductionIntI - this->sizeSM = -1; - } + this->sizeSM = grid.threadByBlock() * sizeof(int); -ReductionIntI::~ReductionIntI() - { + GM::mallocInt0(&ptrSumGM); +} + +ReductionIntI::~ReductionIntI() { // TODO ReductionIntI - } + GM::free(ptrSumGM); +} /*--------------------------------------*\ |* Methode *| \*-------------------------------------*/ -void ReductionIntI::run() - { +void ReductionIntI::run() { // TODO ReductionIntI - } + + KIntProtocoleI<<sizeSM>>>(ptrSumGM); + GM::memcpyDToH_int(ptrSum, ptrSumGM); +} /*----------------------------------------------------------------------*\ |* End *| diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h index 90fe7f2..23870da 100755 --- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h +++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h @@ -8,8 +8,7 @@ |* Declaration *| \*---------------------------------------------------------------------*/ -class ReductionIntI: public RunnableGPU - { +class ReductionIntI: public RunnableGPU { /*--------------------------------------*\ |* Constructor *| \*-------------------------------------*/ @@ -44,7 +43,7 @@ class ReductionIntI: public RunnableGPU int* ptrSumGM; size_t sizeSM; - }; +}; /*----------------------------------------------------------------------*\ |* End *| diff --git a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp index 6e4ef0b..b69c797 100755 --- a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp +++ b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp @@ -29,8 +29,8 @@ static void generic(); int mainTest() { // activer ci-dessous seulement le TP voulu (pas tous) - add(); - //generic(); + // add(); + generic(); return EXIT_SUCCESS; } diff --git a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp index d0a6053..b1e0d72 100755 --- a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp +++ b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp @@ -36,8 +36,8 @@ int mainUse() { bool isOk = true; - reduction_add(isOk); // voir code ci-dessous pour activer la version voulue - // reduction_generic(isOk); // voir code ci-dessous pour activer la version voulue + // reduction_add(isOk); // voir code ci-dessous pour activer la version voulue + reduction_generic(isOk); // voir code ci-dessous pour activer la version voulue print(isOk); @@ -76,16 +76,16 @@ void reduction_generic(bool& isOk) { } // IntII - { - UseReductionIntII algo(IS_VERBOSE); - isOk &= algo.isOk(IS_VERBOSE); - } + // { + // UseReductionIntII algo(IS_VERBOSE); + // isOk &= algo.isOk(IS_VERBOSE); + // } // LongII - { - UseReductionLongII algo(IS_VERBOSE); - isOk &= algo.isOk(IS_VERBOSE); - } + // { + // UseReductionLongII algo(IS_VERBOSE); + // isOk &= algo.isOk(IS_VERBOSE); + // } } /*----------------------------------------------------------------------*\