diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntII_justesse.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntII_justesse.html
new file mode 100644
index 0000000..adb311d
--- /dev/null
+++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntII_justesse.html
@@ -0,0 +1,194 @@
+
+
+
+
+
+
+ Reduce_Generic_IntII_justesse
+
+
+
+
+
+
+Reduce_Generic_IntII_justesse
+
+
+
+
+Summary
+
+
+ | Tests |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | 0 |
+ 0 |
+ 100% |
+ 0.000000 |
+
+
+
+
+Test suites
+
+
+
+Suite: TestReductionGenericII
+
+
+ | Name |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | testDB2 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB4 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB8 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB16 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB32 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB64 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB128 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testGrid |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testMonoBlock |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testSpecialeMax |
+ 0 |
+ true |
+ 0.000000 |
+
+
+Back to top
+
+
+
+
+
+
+ Valid XHTML 1.0 Strict
+
+
+
+
diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntII_performance.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntII_performance.html
new file mode 100644
index 0000000..b72eeb6
--- /dev/null
+++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_IntII_performance.html
@@ -0,0 +1,140 @@
+
+
+
+
+
+
+ Reduce_Generic_IntII_performance
+
+
+
+
+
+
+Reduce_Generic_IntII_performance
+
+
+
+
+Summary
+
+
+ | Tests |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | 1 |
+ 0 |
+ 100% |
+ 11.000000 |
+
+
+
+
+Test suites
+
+
+
+Suite: TestPerformance_RunnableGPU_A
+
+
+ | Name |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | performanceOnly |
+ 0 |
+ true |
+ 11.000000 |
+
+
+Back to top
+
+
+
+
+
+
+ Valid XHTML 1.0 Strict
+
+
+
+
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/device/reductionIntII_device.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/device/reductionIntII_device.cu
index 8fd10f2..f4c6c47 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/device/reductionIntII_device.cu
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/device/reductionIntII_device.cu
@@ -1,4 +1,4 @@
-#include "Thread1D.cu.h"
+#include "Thread2D.cu.h"
#include "cudas.h"
#include "Reduction.cu.h"
@@ -24,10 +24,16 @@ static __device__ void addAtomicV2(int* ptrX , int y);
/**
* TID partout en tabSM
*/
-__global__ void KIntProtocoleII(int* ptrSumGM)
- {
+__global__
+void KIntProtocoleII(int* ptrSumGM) {
// TODO ReductionIntII
- }
+
+ extern __shared__ int tabSM[];
+ reductionIntraThread(tabSM);
+ __syncthreads();
+
+ Reduction::reduce(add, addAtomicV1, tabSM, ptrSumGM);
+}
/*--------------------------------------*\
|* Private *|
@@ -36,46 +42,52 @@ __global__ void KIntProtocoleII(int* ptrSumGM)
/**
* TID partout en tabSM
*/
-__device__ void reductionIntraThread(int* tabSM)
- {
+__device__
+void reductionIntraThread(int* tabSM) {
// TODO ReductionIntII
- }
+ const int TID = Thread2D::tid();
+ const int tidLocal = Thread2D::tidLocal();
+
+ tabSM[tidLocal] = TID;
+}
/*----------------------------*\
|* Operateur reduction *|
\*---------------------------*/
__inline__
-__device__ int add(int x , int y)
- {
+__device__
+int add(int x , int y) {
// TODO ReductionIntII
- }
+ return x + y;
+}
/**
* Utiliser la methode system : atomicAdd(pointeurDestination, valeurSource);
*/
__inline__
-__device__ void addAtomicV1(int* ptrX , int y)
- {
+__device__
+void addAtomicV1(int* ptrX , int y) {
// TODO ReductionIntII
- }
+ atomicAdd(ptrX, y);
+}
/**
* Une alternative, moins performante, mais generalisable serait d'employer un lock
* Tip : le Lock est implementer avec deux methodes atomic
*/
__device__ int volatile mutex = 0; //variable global
-__device__ void addAtomicV2(int* ptrX , int y)
- {
+__device__
+void addAtomicV2(int* ptrX , int y) {
Lock locker(&mutex);
locker.lock();
// TODO ReductionIntII
+ *ptrX = *ptrX + y;
locker.unlock();
- }
+}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/
-
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.cu
index 2b7f24b..aa5f542 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.cu
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.cu
@@ -27,25 +27,29 @@ extern __global__ void KIntProtocoleII(int* ptrSumGM);
ReductionIntII::ReductionIntII(const Grid& grid , int* ptrSum,bool isVerbose) :
//RunnableGPU(grid, "Reduce_Generic_IntII_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_Generic_IntII" ,isVerbose), // classe parente
- ptrSum(ptrSum)
- {
+ ptrSum(ptrSum) {
// TODO ReductionIntII
- this->sizeSM = -1;
- }
+ this->sizeSM = grid.threadByBlock() * sizeof(int);
-ReductionIntII::~ReductionIntII()
- {
+ GM::mallocInt0(&ptrSumGM);
+}
+
+ReductionIntII::~ReductionIntII() {
// TODO ReductionIntII
- }
+
+ GM::free(ptrSumGM);
+}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
-void ReductionIntII::run()
- {
+void ReductionIntII::run() {
// TODO ReductionIntII
- }
+
+ KIntProtocoleII<<sizeSM>>>(ptrSumGM);
+ GM::memcpyDToH_int(ptrSum, ptrSumGM);
+}
/*----------------------------------------------------------------------*\
|* End *|
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.h b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.h
index 6e60293..1cfb80d 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.h
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.h
@@ -8,8 +8,7 @@
|* Declaration *|
\*---------------------------------------------------------------------*/
-class ReductionIntII: public RunnableGPU
- {
+class ReductionIntII: public RunnableGPU {
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
@@ -44,7 +43,7 @@ class ReductionIntII: public RunnableGPU
int* ptrSumGM;
size_t sizeSM;
- };
+};
/*----------------------------------------------------------------------*\
|* End *|
diff --git a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
index b69c797..4aec235 100755
--- a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
+++ b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
@@ -58,8 +58,8 @@ void generic() {
VTReductionGenericII test2;
VTReductionGenericLongII test3;
- test1.run();
-// test2.run();
+ // test1.run();
+ test2.run();
// test3.run();
}
diff --git a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
index b1e0d72..66b9c26 100755
--- a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
+++ b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
@@ -70,17 +70,17 @@ void reduction_add(bool& isOk) {
*/
void reduction_generic(bool& isOk) {
// InbI
- {
- UseReductionIntI algo(IS_VERBOSE);
- isOk &= algo.isOk(IS_VERBOSE);
- }
-
- // IntII
// {
- // UseReductionIntII algo(IS_VERBOSE);
+ // UseReductionIntI algo(IS_VERBOSE);
// isOk &= algo.isOk(IS_VERBOSE);
// }
+ // IntII
+ {
+ UseReductionIntII algo(IS_VERBOSE);
+ isOk &= algo.isOk(IS_VERBOSE);
+ }
+
// LongII
// {
// UseReductionLongII algo(IS_VERBOSE);