diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_justesse.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_justesse.html
new file mode 100644
index 0000000..e366154
--- /dev/null
+++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_justesse.html
@@ -0,0 +1,212 @@
+
+
+
+
+
+
+ Reduce_Generic_IntI_justesse
+
+
+
+
+
+
+Reduce_Generic_IntI_justesse
+
+
+
+
+Summary
+
+
+ | Tests |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | 0 |
+ 0 |
+ 100% |
+ 1.000000 |
+
+
+
+
+Test suites
+
+
+ | Name |
+ Tests |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | TestReductionGenericI |
+ 13 |
+ 0 |
+ 100% |
+ 1.000000 |
+
+
+
+
+Suite: TestReductionGenericI
+
+
+ | Name |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | testDB2 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB4 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB8 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB16 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB32 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB64 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB128 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB256 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB512 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB1024 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testGrid |
+ 0 |
+ true |
+ 1.000000 |
+
+
+ | testMonoBlock |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testSpecialeMax |
+ 0 |
+ true |
+ 0.000000 |
+
+
+Back to top
+
+
+
+
+
+
+ Valid XHTML 1.0 Strict
+
+
+
+
diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_performance.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_performance.html
new file mode 100644
index 0000000..3b01ba1
--- /dev/null
+++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntI_performance.html
@@ -0,0 +1,140 @@
+
+
+
+
+
+
+ Reduce_Generic_IntI_performance
+
+
+
+
+
+
+Reduce_Generic_IntI_performance
+
+
+
+
+Summary
+
+
+ | Tests |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | 1 |
+ 0 |
+ 100% |
+ 11.000000 |
+
+
+
+
+Test suites
+
+
+
+Suite: TestPerformance_RunnableGPU_A
+
+
+ | Name |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | performanceOnly |
+ 0 |
+ true |
+ 11.000000 |
+
+
+Back to top
+
+
+
+
+
+
+ Valid XHTML 1.0 Strict
+
+
+
+
diff --git a/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h b/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h
index 23eed8a..fbd63b9 100755
--- a/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h
+++ b/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h
@@ -55,9 +55,11 @@ class Reduction {
//static __device__ void reduce(T (*OP)(T, T) ,void (*ATOMIC_OP)(T*, T), T* tabSM, T* ptrResultGM) // idem ci-dessus mais sans define
{
// Meme principe que ReductionAdd
+ reductionIntraBlock(OP,tabSM);
// TODO ReductionGeneric
// Meme principe que ReductionAdd
+ reductionInterBlock(ATOMIC_OP,tabSM, ptrResultGM);
}
private:
@@ -76,6 +78,11 @@ class Reduction {
// TODO ReductionGeneric
// Meme principe que ReductionAdd
// OP est la variable representant l'operateur binaire
+
+ const int tidLocal = Thread2D::tidLocal();
+ if (tidLocal < middle) {
+ tabSM[tidLocal] = OP(tabSM[tidLocal], tabSM[tidLocal + middle]);
+ }
}
/**
@@ -88,6 +95,15 @@ class Reduction {
// TODO ReductionGeneric
// Meme principe que ReductionAdd
// OP est la variable representant l'operateur binaire
+
+ const int NB_THEAD_LOCAL = Thread2D::nbThreadLocal();
+ int middle = NB_THEAD_LOCAL >> 1;
+ while (middle > 0) {
+ ecrasement(OP,tabSM, middle);
+ __syncthreads();
+ middle = middle >> 1;
+ }
+
}
/*--------------------------------------*\
@@ -101,6 +117,10 @@ class Reduction {
// TODO ReductionGeneric
// Meme principe que ReductionAdd
// ATOMIC_OP est la variable representant l'operateur binaire atomic
+
+ if (Thread2D::tidLocal() == 0) {
+ ATOMIC_OP(ptrResultGM, tabSM[0]);
+ }
}
};
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu
index 6f312a0..37793f9 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/device/reductionIntI_device.cu
@@ -1,4 +1,4 @@
-#include "Thread1D.cu.h"
+#include "Thread2D.cu.h"
#include "cudas.h"
#include "Reduction.cu.h"
@@ -23,10 +23,16 @@ static __device__ void addAtomicV2(int* ptrX , int y);
/**
* 1 partout en tabSM
*/
-__global__ void KIntProtocoleI(int* ptrSumGM)
- {
+__global__
+void KIntProtocoleI(int* ptrSumGM) {
// TODO ReductionIntI
- }
+
+ extern __shared__ int tabSM[];
+ reductionIntraThread(tabSM);
+ __syncthreads();
+
+ Reduction::reduce(add, addAtomicV1, tabSM, ptrSumGM);
+}
/*--------------------------------------*\
|* Private *|
@@ -35,29 +41,33 @@ __global__ void KIntProtocoleI(int* ptrSumGM)
/**
* 1 partout en tabSM
*/
-__device__ void reductionIntraThread(int* tabSM)
- {
+__device__
+void reductionIntraThread(int* tabSM) {
// TODO ReductionIntI
- }
+ const int tidLocal = Thread2D::tidLocal();
+ tabSM[tidLocal] = 1;
+}
/*----------------------------*\
|* Operateur reduction *|
\*---------------------------*/
__inline__
-__device__ int add(int x , int y)
- {
+__device__
+int add(int x , int y) {
// TODO ReductionIntI
- }
+ return x + y;
+}
/**
* Utiliser la methode system : atomicAdd(pointeurDestination, valeurSource);
*/
__inline__
-__device__ void addAtomicV1(int* ptrX , int y)
- {
+__device__
+void addAtomicV1(int* ptrX , int y) {
// TODO ReductionIntI
- }
+ atomicAdd(ptrX, y);
+}
/**
* 10x plus lent,mais plus flexible!
@@ -65,17 +75,17 @@ __device__ void addAtomicV1(int* ptrX , int y)
* Necessaire aussi pour des objets par exemple
*/
__device__ int volatile mutex = 0; //variable global
-__device__ void addAtomicV2(int* ptrX , int y)
- {
+__device__
+void addAtomicV2(int* ptrX , int y) {
Lock locker(&mutex);
locker.lock();
// TODO ReductionIntI
+ *ptrX = *ptrX + y;
locker.unlock();
- }
+}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/
-
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu
index b65099a..73fe4f4 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.cu
@@ -14,7 +14,9 @@ using std::to_string;
|* Imported *|
\*---------------------------------------------------------------------*/
-extern __global__ void KIntProtocoleI(int* ptrSumGM);
+extern
+__global__
+void KIntProtocoleI(int* ptrSumGM);
/*----------------------------------------------------------------------*\
|* Implementation *|
@@ -27,25 +29,28 @@ extern __global__ void KIntProtocoleI(int* ptrSumGM);
ReductionIntI::ReductionIntI(const Grid& grid , int* ptrSum,bool isVerbose) :
//RunnableGPU(grid, "Redude_Generic_IntI_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_Generic_IntI",isVerbose), // classe parente
- ptrSum(ptrSum)
- {
+ ptrSum(ptrSum) {
// TODO ReductionIntI
- this->sizeSM = -1;
- }
+ this->sizeSM = grid.threadByBlock() * sizeof(int);
-ReductionIntI::~ReductionIntI()
- {
+ GM::mallocInt0(&ptrSumGM);
+}
+
+ReductionIntI::~ReductionIntI() {
// TODO ReductionIntI
- }
+ GM::free(ptrSumGM);
+}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
-void ReductionIntI::run()
- {
+void ReductionIntI::run() {
// TODO ReductionIntI
- }
+
+ KIntProtocoleI<<sizeSM>>>(ptrSumGM);
+ GM::memcpyDToH_int(ptrSum, ptrSumGM);
+}
/*----------------------------------------------------------------------*\
|* End *|
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h
index 90fe7f2..23870da 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h
@@ -8,8 +8,7 @@
|* Declaration *|
\*---------------------------------------------------------------------*/
-class ReductionIntI: public RunnableGPU
- {
+class ReductionIntI: public RunnableGPU {
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
@@ -44,7 +43,7 @@ class ReductionIntI: public RunnableGPU
int* ptrSumGM;
size_t sizeSM;
- };
+};
/*----------------------------------------------------------------------*\
|* End *|
diff --git a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
index 6e4ef0b..b69c797 100755
--- a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
+++ b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
@@ -29,8 +29,8 @@ static void generic();
int mainTest() {
// activer ci-dessous seulement le TP voulu (pas tous)
- add();
- //generic();
+ // add();
+ generic();
return EXIT_SUCCESS;
}
diff --git a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
index d0a6053..b1e0d72 100755
--- a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
+++ b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
@@ -36,8 +36,8 @@ int mainUse() {
bool isOk = true;
- reduction_add(isOk); // voir code ci-dessous pour activer la version voulue
- // reduction_generic(isOk); // voir code ci-dessous pour activer la version voulue
+ // reduction_add(isOk); // voir code ci-dessous pour activer la version voulue
+ reduction_generic(isOk); // voir code ci-dessous pour activer la version voulue
print(isOk);
@@ -76,16 +76,16 @@ void reduction_generic(bool& isOk) {
}
// IntII
- {
- UseReductionIntII algo(IS_VERBOSE);
- isOk &= algo.isOk(IS_VERBOSE);
- }
+ // {
+ // UseReductionIntII algo(IS_VERBOSE);
+ // isOk &= algo.isOk(IS_VERBOSE);
+ // }
// LongII
- {
- UseReductionLongII algo(IS_VERBOSE);
- isOk &= algo.isOk(IS_VERBOSE);
- }
+ // {
+ // UseReductionLongII algo(IS_VERBOSE);
+ // isOk &= algo.isOk(IS_VERBOSE);
+ // }
}
/*----------------------------------------------------------------------*\