diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_LongII_justesse.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_LongII_justesse.html
new file mode 100644
index 0000000..a934ff6
--- /dev/null
+++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_LongII_justesse.html
@@ -0,0 +1,224 @@
+
+
+
+
+
+
+ Reduce_Generic_LongII_justesse
+
+
+
+
+
+
+Reduce_Generic_LongII_justesse
+
+
+
+
+Summary
+
+
+ | Tests |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | 0 |
+ 0 |
+ 100% |
+ 80.000000 |
+
+
+
+
+Test suites
+
+
+
+Suite: TestReductionGenericLongII
+
+
+ | Name |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | testDB2 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB4 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB8 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB16 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB32 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB64 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB128 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB256 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB512 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testDB1024 |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testGrid |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testMonoBlock |
+ 0 |
+ true |
+ 0.000000 |
+
+
+ | testspecialGridDGXMAX |
+ 0 |
+ true |
+ 39.000000 |
+
+
+ | testSpecialGrid2 |
+ 0 |
+ true |
+ 40.000000 |
+
+
+ | testSpecialeMax |
+ 0 |
+ true |
+ 1.000000 |
+
+
+Back to top
+
+
+
+
+
+
+ Valid XHTML 1.0 Strict
+
+
+
+
diff --git a/Student_Cuda_Tools_Reduction/out/Reduce_Generic_LongII_performance.html b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_LongII_performance.html
new file mode 100644
index 0000000..eaa5bff
--- /dev/null
+++ b/Student_Cuda_Tools_Reduction/out/Reduce_Generic_LongII_performance.html
@@ -0,0 +1,140 @@
+
+
+
+
+
+
+ Reduce_Generic_LongII_performance
+
+
+
+
+
+
+Reduce_Generic_LongII_performance
+
+
+
+
+Summary
+
+
+ | Tests |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | 1 |
+ 0 |
+ 100% |
+ 11.000000 |
+
+
+
+
+Test suites
+
+
+
+Suite: TestPerformance_RunnableGPU_A
+
+
+ | Name |
+ Errors |
+ Success |
+ Time (s) |
+
+
+ | performanceOnly |
+ 0 |
+ true |
+ 11.000000 |
+
+
+Back to top
+
+
+
+
+
+
+ Valid XHTML 1.0 Strict
+
+
+
+
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/device/reductionLongII_device.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/device/reductionLongII_device.cu
index 1d8d130..213c4d5 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/device/reductionLongII_device.cu
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/device/reductionLongII_device.cu
@@ -1,4 +1,4 @@
-#include "Thread1D_long.cu.h"
+#include "Thread2D_long.cu.h"
#include "cudas.h"
#include "Reduction.cu.h"
@@ -23,10 +23,16 @@ static __device__ void addAtomic(long* ptrX , long y);
/**
* TID partout en tabSM
*/
-__global__ void KLongProtocoleII(long* ptrSumGM)
- {
+__global__
+void KLongProtocoleII(long* ptrSumGM) {
// TODO ReductionLongII
- }
+ //
+ extern __shared__ long tabSM[];
+ reductionIntraThread(tabSM);
+ __syncthreads();
+
+ Reduction::reduce(add, addAtomic, tabSM, ptrSumGM);
+}
/*--------------------------------------*\
|* Private *|
@@ -35,8 +41,9 @@ __global__ void KLongProtocoleII(long* ptrSumGM)
/**
* TID partout en tabSM
*/
-__device__ void reductionIntraThread(long* tabSM)
- { // Rappel : Dans le protocoleII on cherche a calculer
+__device__
+void reductionIntraThread(long* tabSM) {
+ // Rappel : Dans le protocoleII on cherche a calculer
//
// x=x+i avec i in [0,N]
//
@@ -70,7 +77,12 @@ __device__ void reductionIntraThread(long* tabSM)
// pour TID utiliser const long TID=Thread2D_long::tid(); // (nouvelle methode)
// pour TID_LOCAL utiliser const int TID_LOCAL=Thread2D::tidLocal(); // (methode habituelle)
- }
+ //
+ const long TID=Thread2D_long::tid();
+ const int TID_LOCAL=Thread2D::tidLocal();
+
+ tabSM[TID_LOCAL] = TID;
+}
/*----------------------------*\
@@ -78,10 +90,11 @@ __device__ void reductionIntraThread(long* tabSM)
\*---------------------------*/
__inline__
-__device__ long add(long x , long y)
- {
+__device__
+long add(long x , long y) {
// TODO ReductionLongII
- }
+ return x + y;
+}
/**
* Utiliser la methode system, si elle existe
@@ -90,20 +103,20 @@ __device__ long add(long x , long y)
*
* ou la technique du lock vu precedement!
*
- * Question : atomicAdd pour les long existe?
+ * Question : atomicAdd pour les long existe? non ;(
*/
__device__ int volatile mutex = 0; //variable global
-__device__ void addAtomic(long* ptrX , long y)
- {
+__device__
+void addAtomic(long* ptrX , long y) {
Lock locker(&mutex);
locker.lock();
// TODO ReductionLongII
+ *ptrX = *ptrX + y;
locker.unlock();
- }
+}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/
-
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.cu b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.cu
index b52d182..6e0af10 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.cu
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.cu
@@ -27,25 +27,27 @@ extern __global__ void KLongProtocoleII(long* ptrSumGM);
ReductionLongII::ReductionLongII(const Grid& grid , long* ptrSum,bool isVerbose) :
//RunnableGPU(grid, "Reduce_Generic_LongII_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_Generic_LongII",isVerbose), // classe parente
- ptrSum(ptrSum)
- {
+ ptrSum(ptrSum) {
// TODO ReductionLongII
- this->sizeSM = -1;
- }
+ this->sizeSM = grid.threadByBlock() * sizeof(long);
-ReductionLongII::~ReductionLongII()
- {
+ GM::mallocLong0(&ptrSumGM);
+}
+
+ReductionLongII::~ReductionLongII() {
// TODO ReductionLongII
- }
+ GM::free(ptrSumGM);
+}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
-void ReductionLongII::run()
- {
+void ReductionLongII::run() {
// TODO ReductionLongII
- }
+ KLongProtocoleII<<sizeSM>>>(ptrSumGM);
+ GM::memcpyDToH_long(ptrSum, ptrSumGM);
+}
/*----------------------------------------------------------------------*\
|* End *|
diff --git a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.h b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.h
index 82d8193..774d7b2 100755
--- a/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.h
+++ b/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.h
@@ -8,8 +8,7 @@
|* Declaration *|
\*---------------------------------------------------------------------*/
-class ReductionLongII: public RunnableGPU
- {
+class ReductionLongII: public RunnableGPU {
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
@@ -44,7 +43,7 @@ class ReductionLongII: public RunnableGPU
long* ptrSumGM;
size_t sizeSM;
- };
+};
/*----------------------------------------------------------------------*\
|* End *|
diff --git a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
index 4aec235..b1cbc6c 100755
--- a/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
+++ b/Student_Cuda_Tools_Reduction/src/main/mainTest.cpp
@@ -59,8 +59,8 @@ void generic() {
VTReductionGenericLongII test3;
// test1.run();
- test2.run();
-// test3.run();
+ // test2.run();
+ test3.run();
}
/*----------------------------------------------------------------------*\
diff --git a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
index 66b9c26..522bafc 100755
--- a/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
+++ b/Student_Cuda_Tools_Reduction/src/main/mainUse.cpp
@@ -76,16 +76,16 @@ void reduction_generic(bool& isOk) {
// }
// IntII
- {
- UseReductionIntII algo(IS_VERBOSE);
- isOk &= algo.isOk(IS_VERBOSE);
- }
-
- // LongII
// {
- // UseReductionLongII algo(IS_VERBOSE);
+ // UseReductionIntII algo(IS_VERBOSE);
// isOk &= algo.isOk(IS_VERBOSE);
// }
+
+ // LongII
+ {
+ UseReductionLongII algo(IS_VERBOSE);
+ isOk &= algo.isOk(IS_VERBOSE);
+ }
}
/*----------------------------------------------------------------------*\