feat(reduction): add reduction PI add
This commit is contained in:
@@ -1 +1 @@
|
|||||||
/home/bilat/CBI/Dropbox/02_CBI_LINUX/CoursCuda/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h
|
/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/01_algorithme/generic/Reduction.cu.h
|
||||||
@@ -1 +1 @@
|
|||||||
/home/bilat/CBI/Dropbox/02_CBI_LINUX/CoursCuda/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/01_algorithme/add/ReductionAdd.cu.h
|
/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/01_algorithme/add/ReductionAdd.cu.h
|
||||||
@@ -1 +1 @@
|
|||||||
/home/bilat/CBI/Dropbox/02_CBI_LINUX/CoursCuda/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/add/int/PI/host/ReductionAddIntI.h
|
/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/add/int/PI/host/ReductionAddIntI.h
|
||||||
@@ -1 +1 @@
|
|||||||
/home/bilat/CBI/Dropbox/02_CBI_LINUX/CoursCuda/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/add/int/PII/host/ReductionAddIntII.h
|
/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/add/int/PII/host/ReductionAddIntII.h
|
||||||
@@ -1 +1 @@
|
|||||||
/home/bilat/CBI/Dropbox/02_CBI_LINUX/CoursCuda/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h
|
/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PI/host/ReductionIntI.h
|
||||||
@@ -1 +1 @@
|
|||||||
/home/bilat/CBI/Dropbox/02_CBI_LINUX/CoursCuda/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.h
|
/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/int/PII/host/ReductionIntII.h
|
||||||
@@ -1 +1 @@
|
|||||||
/home/bilat/CBI/Dropbox/02_CBI_LINUX/CoursCuda/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.h
|
/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda_Tools_Reduction/src/core/02_use_protocole/generic/long/PII/host/ReductionLongII.h
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,13 +1,12 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Thread1D.cu.h"
|
#include "Thread2D.cu.h"
|
||||||
|
|
||||||
/*----------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------*\
|
||||||
|* Implementation *|
|
|* Implementation *|
|
||||||
\*---------------------------------------------------------------------*/
|
\*---------------------------------------------------------------------*/
|
||||||
|
|
||||||
class ReductionAdd
|
class ReductionAdd {
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -46,8 +45,9 @@ class ReductionAdd
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void reduce(T* tabSM, T* ptrResultGM)
|
static
|
||||||
{
|
__device__
|
||||||
|
void reduce(T* tabSM, T* ptrResultGM) {
|
||||||
// Rappel :
|
// Rappel :
|
||||||
// |ThreadByBlock|=|tabSM| .
|
// |ThreadByBlock|=|tabSM| .
|
||||||
// Il y autant de case en SM que de thread par block.
|
// Il y autant de case en SM que de thread par block.
|
||||||
@@ -55,11 +55,11 @@ class ReductionAdd
|
|||||||
// 1 thread <---> 1 armoire
|
// 1 thread <---> 1 armoire
|
||||||
|
|
||||||
// TODO ReductionAdd
|
// TODO ReductionAdd
|
||||||
// reductionIntraBlock
|
reductionIntraBlock(tabSM);
|
||||||
// reductionInterblock
|
reductionInterBlock(tabSM, ptrResultGM);
|
||||||
|
|
||||||
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou? pas a le fin en tous les cas
|
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou? pas a le fin en tous les cas
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@@ -71,8 +71,9 @@ class ReductionAdd
|
|||||||
* used by reductionIntraBlock
|
* used by reductionIntraBlock
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void ecrasement(T* tabSM, int middle)
|
static
|
||||||
{
|
__device__
|
||||||
|
void ecrasement(T* tabSM, int middle) {
|
||||||
// Indications :
|
// Indications :
|
||||||
// (I1) je suis un thread, je dois faire quoi ?
|
// (I1) je suis un thread, je dois faire quoi ?
|
||||||
// (I2) Tous les threads doivent-ils faire quelquechose?
|
// (I2) Tous les threads doivent-ils faire quelquechose?
|
||||||
@@ -80,39 +81,59 @@ class ReductionAdd
|
|||||||
|
|
||||||
// TODO ReductionAdd
|
// TODO ReductionAdd
|
||||||
|
|
||||||
|
const int localTID = Thread2D::tidLocal();
|
||||||
|
|
||||||
|
if(localTID < middle) {
|
||||||
|
tabSM[localTID] = tabSM[localTID] + tabSM[localTID + middle];
|
||||||
|
}
|
||||||
|
|
||||||
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
|
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sur place, le resultat est dans tabSM[0]
|
* Sur place, le resultat est dans tabSM[0]
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void reductionIntraBlock(T* tabSM)
|
static
|
||||||
{
|
__device__
|
||||||
|
void reductionIntraBlock(T* tabSM) { // Reduce tab SM (all in [0])
|
||||||
// Ecrasement sucessifs dans une boucle (utiliser la methode ecrasement ci-dessus)
|
// Ecrasement sucessifs dans une boucle (utiliser la methode ecrasement ci-dessus)
|
||||||
|
|
||||||
// TODO ReductionAdd
|
// TODO ReductionAdd
|
||||||
|
|
||||||
|
const int NB_THREAD_LOCAL = Thread2D::nbThreadLocal();
|
||||||
|
int middle = NB_THREAD_LOCAL>>1;
|
||||||
|
|
||||||
|
while (middle > 0) {
|
||||||
|
ecrasement(tabSM, middle);
|
||||||
|
__syncthreads();
|
||||||
|
middle = middle >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
|
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
|
||||||
}
|
}
|
||||||
|
|
||||||
/*--------------------------------------*\
|
/*--------------------------------------*\
|
||||||
|* reductionInterblock *|
|
|* reductionInterblock *|
|
||||||
\*-------------------------------------*/
|
\*-------------------------------------*/
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void reductionInterBlock(T* tabSM, T* ptrResultGM)
|
static
|
||||||
{
|
__device__
|
||||||
|
void reductionInterBlock(T* tabSM, T* ptrResultGM) { // SM -> GM
|
||||||
// Indication:
|
// Indication:
|
||||||
// (I1) Utiliser atomicAdd(pointeurDestination, valeurSource);
|
// (I1) Utiliser atomicAdd(pointeurDestination, valeurSource);
|
||||||
// (i2) Travailler sous l hypothese d'une grid2d,avec Thread2D
|
// (i2) Travailler sous l hypothese d'une grid2d,avec Thread2D
|
||||||
|
|
||||||
// TODO ReductionAdd
|
// TODO ReductionAdd
|
||||||
|
if(Thread2D::tidLocal() == 0) {
|
||||||
|
atomicAdd(ptrResultGM, tabSM[0]);
|
||||||
|
}
|
||||||
|
|
||||||
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
|
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*----------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------*\
|
||||||
|* End *|
|
|* End *|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Lock.cu.h"
|
#include "Lock.cu.h"
|
||||||
#include "Thread1D.cu.h"
|
#include "Thread2D.cu.h"
|
||||||
|
|
||||||
/*----------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------*\
|
||||||
|* prt fonction / reduction *|
|
|* prt fonction / reduction *|
|
||||||
@@ -14,8 +14,7 @@
|
|||||||
|* Implementation *|
|
|* Implementation *|
|
||||||
\*---------------------------------------------------------------------*/
|
\*---------------------------------------------------------------------*/
|
||||||
|
|
||||||
class Reduction
|
class Reduction {
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -50,14 +49,16 @@ class Reduction
|
|||||||
* ReductionGeneric::reduce(add,addAtomic,tabSm,ptrResultGM);
|
* ReductionGeneric::reduce(add,addAtomic,tabSm,ptrResultGM);
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void reduce(BinaryOperator(OP) ,AtomicOp(ATOMIC_OP), T* tabSM, T* ptrResultGM)
|
static
|
||||||
|
__device__
|
||||||
|
void reduce(BinaryOperator(OP) ,AtomicOp(ATOMIC_OP), T* tabSM, T* ptrResultGM)
|
||||||
//static __device__ void reduce(T (*OP)(T, T) ,void (*ATOMIC_OP)(T*, T), T* tabSM, T* ptrResultGM) // idem ci-dessus mais sans define
|
//static __device__ void reduce(T (*OP)(T, T) ,void (*ATOMIC_OP)(T*, T), T* tabSM, T* ptrResultGM) // idem ci-dessus mais sans define
|
||||||
{
|
{
|
||||||
// Meme principe que ReductionAdd
|
// Meme principe que ReductionAdd
|
||||||
|
|
||||||
// TODO ReductionGeneric
|
// TODO ReductionGeneric
|
||||||
// Meme principe que ReductionAdd
|
// Meme principe que ReductionAdd
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@@ -69,37 +70,40 @@ class Reduction
|
|||||||
* used by reductionIntraBlock
|
* used by reductionIntraBlock
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void ecrasement(BinaryOperator(OP),T* tabSM, int middle)
|
static
|
||||||
{
|
__device__
|
||||||
|
void ecrasement(BinaryOperator(OP),T* tabSM, int middle) {
|
||||||
// TODO ReductionGeneric
|
// TODO ReductionGeneric
|
||||||
// Meme principe que ReductionAdd
|
// Meme principe que ReductionAdd
|
||||||
// OP est la variable representant l'operateur binaire
|
// OP est la variable representant l'operateur binaire
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sur place, le resultat est dans tabSM[0]
|
* Sur place, le resultat est dans tabSM[0]
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void reductionIntraBlock(BinaryOperator(OP),T* tabSM)
|
static
|
||||||
{
|
__device__
|
||||||
|
void reductionIntraBlock(BinaryOperator(OP),T* tabSM) {
|
||||||
// TODO ReductionGeneric
|
// TODO ReductionGeneric
|
||||||
// Meme principe que ReductionAdd
|
// Meme principe que ReductionAdd
|
||||||
// OP est la variable representant l'operateur binaire
|
// OP est la variable representant l'operateur binaire
|
||||||
}
|
}
|
||||||
|
|
||||||
/*--------------------------------------*\
|
/*--------------------------------------*\
|
||||||
|* reductionInterblock *|
|
|* reductionInterblock *|
|
||||||
\*-------------------------------------*/
|
\*-------------------------------------*/
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static __device__ void reductionInterBlock(AtomicOp(ATOMIC_OP), T* tabSM, T* ptrResultGM)
|
static
|
||||||
{
|
__device__
|
||||||
|
void reductionInterBlock(AtomicOp(ATOMIC_OP), T* tabSM, T* ptrResultGM) {
|
||||||
// TODO ReductionGeneric
|
// TODO ReductionGeneric
|
||||||
// Meme principe que ReductionAdd
|
// Meme principe que ReductionAdd
|
||||||
// ATOMIC_OP est la variable representant l'operateur binaire atomic
|
// ATOMIC_OP est la variable representant l'operateur binaire atomic
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*----------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------*\
|
||||||
|* End *|
|
|* End *|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#include "Thread1D.cu.h"
|
#include "Thread2D.cu.h"
|
||||||
#include "cudas.h"
|
#include "cudas.h"
|
||||||
|
|
||||||
#include "ReductionAdd.cu.h"
|
#include "ReductionAdd.cu.h"
|
||||||
@@ -9,7 +9,9 @@
|
|||||||
|* Private *|
|
|* Private *|
|
||||||
\*---------------------------------------------------------------------*/
|
\*---------------------------------------------------------------------*/
|
||||||
|
|
||||||
static __device__ void reductionIntraThread(int* tabSM);
|
static
|
||||||
|
__device__
|
||||||
|
void reductionIntraThread(int* tabSM);
|
||||||
|
|
||||||
/*----------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------*\
|
||||||
|* Implementation *|
|
|* Implementation *|
|
||||||
@@ -18,13 +20,19 @@ static __device__ void reductionIntraThread(int* tabSM);
|
|||||||
/**
|
/**
|
||||||
* 1 partout en tabSM
|
* 1 partout en tabSM
|
||||||
*/
|
*/
|
||||||
__global__ void KAddIntProtocoleI(int* ptrSumGM)
|
__global__
|
||||||
{
|
void KAddIntProtocoleI(int* ptrSumGM) {
|
||||||
// TODO ReductionAddIntI
|
// TODO ReductionAddIntI
|
||||||
|
|
||||||
// Reception tabSM
|
// Reception tabSM
|
||||||
|
extern __shared__ int tabSM[];
|
||||||
|
|
||||||
// ReductionIntraThread
|
// ReductionIntraThread
|
||||||
|
reductionIntraThread(tabSM);
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
// ReductionAdd
|
// ReductionAdd
|
||||||
|
ReductionAdd::reduce(tabSM, ptrSumGM);
|
||||||
|
|
||||||
// __syncthreads(); // des threads de meme block!// Question : utile? ou?
|
// __syncthreads(); // des threads de meme block!// Question : utile? ou?
|
||||||
}
|
}
|
||||||
@@ -36,12 +44,15 @@ __global__ void KAddIntProtocoleI(int* ptrSumGM)
|
|||||||
/**
|
/**
|
||||||
* 1 partout en tabSM
|
* 1 partout en tabSM
|
||||||
*/
|
*/
|
||||||
__device__ void reductionIntraThread(int* tabSM)
|
__device__
|
||||||
{
|
void reductionIntraThread(int* tabSM) {
|
||||||
// TODO ReductionAddIntI
|
// TODO ReductionAddIntI
|
||||||
}
|
const int localTID = Thread2D::tidLocal();
|
||||||
|
|
||||||
|
tabSM[localTID] = 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------*\
|
||||||
|* End *|
|
|* End *|
|
||||||
\*---------------------------------------------------------------------*/
|
\*---------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
|||||||
@@ -27,32 +27,33 @@ extern __global__ void KAddIntProtocoleI(int* ptrSumGM);
|
|||||||
ReductionAddIntI::ReductionAddIntI(const Grid& grid , int* ptrSum , bool isVerbose) :
|
ReductionAddIntI::ReductionAddIntI(const Grid& grid , int* ptrSum , bool isVerbose) :
|
||||||
//RunnableGPU(grid, "Reduce_Add_IntI_" + to_string(grid.threadCounts()),isVerbose), // classe parente
|
//RunnableGPU(grid, "Reduce_Add_IntI_" + to_string(grid.threadCounts()),isVerbose), // classe parente
|
||||||
RunnableGPU(grid, "Reduce_Add_IntI", isVerbose), // classe parente
|
RunnableGPU(grid, "Reduce_Add_IntI", isVerbose), // classe parente
|
||||||
ptrSum(ptrSum)
|
ptrSum(ptrSum) {
|
||||||
{
|
|
||||||
// TODO ReductionAddIntI
|
// TODO ReductionAddIntI
|
||||||
// MM pour ptrSumGM (oubliez pas initialisation)
|
// MM pour ptrSumGM (oubliez pas initialisation)
|
||||||
this->sizeSM = -1;
|
this->sizeSM = grid.threadByBlock() * sizeof(int);
|
||||||
|
|
||||||
// Tip: Il y a une methode dedier pour malloquer un int cote device et l'initialiser a zero
|
// Tip: Il y a une methode dedier pour malloquer un int cote device et l'initialiser a zero
|
||||||
//
|
//
|
||||||
// GM::mallocInt0(&ptrSumGM);
|
GM::mallocInt0(&ptrSumGM);
|
||||||
}
|
}
|
||||||
|
|
||||||
ReductionAddIntI::~ReductionAddIntI()
|
ReductionAddIntI::~ReductionAddIntI() {
|
||||||
{
|
|
||||||
// TODO ReductionAddIntI
|
// TODO ReductionAddIntI
|
||||||
}
|
GM::free(ptrSumGM);
|
||||||
|
}
|
||||||
|
|
||||||
/*--------------------------------------*\
|
/*--------------------------------------*\
|
||||||
|* Methode *|
|
|* Methode *|
|
||||||
\*-------------------------------------*/
|
\*-------------------------------------*/
|
||||||
|
|
||||||
void ReductionAddIntI::run()
|
void ReductionAddIntI::run() {
|
||||||
{
|
|
||||||
// TODO ReductionAddIntI
|
// TODO ReductionAddIntI
|
||||||
// appeler le kernel
|
// appeler le kernel
|
||||||
// recuperer le resulat coter host
|
// recuperer le resulat coter host
|
||||||
|
|
||||||
|
KAddIntProtocoleI<<<dg,db,this->sizeSM>>>(ptrSumGM);
|
||||||
|
GM::memcpyDToH_int(ptrSum, ptrSumGM);
|
||||||
|
|
||||||
// Tip: Il y a une methode dedier ramener coter host un int
|
// Tip: Il y a une methode dedier ramener coter host un int
|
||||||
//
|
//
|
||||||
// GM::memcpyDtoH_int(ptrDestination, ptrSourceGM););
|
// GM::memcpyDtoH_int(ptrDestination, ptrSourceGM););
|
||||||
|
|||||||
@@ -8,8 +8,7 @@
|
|||||||
|* Declaration *|
|
|* Declaration *|
|
||||||
\*---------------------------------------------------------------------*/
|
\*---------------------------------------------------------------------*/
|
||||||
|
|
||||||
class ReductionAddIntI: public RunnableGPU
|
class ReductionAddIntI: public RunnableGPU {
|
||||||
{
|
|
||||||
/*--------------------------------------*\
|
/*--------------------------------------*\
|
||||||
|* Constructor *|
|
|* Constructor *|
|
||||||
\*-------------------------------------*/
|
\*-------------------------------------*/
|
||||||
|
|||||||
@@ -26,8 +26,8 @@ int main(int argc , char** argv)
|
|||||||
|
|
||||||
// public
|
// public
|
||||||
{
|
{
|
||||||
cudaContext.deviceId = 0; // in [0,2] width Server Cuda3
|
cudaContext.deviceId = 1; // in [0,2] width Server Cuda3
|
||||||
cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST (only)
|
cudaContext.launchMode = LaunchModeMOO::TEST; // USE TEST (only)
|
||||||
|
|
||||||
cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT LOAD_ALL
|
cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT LOAD_ALL
|
||||||
cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE; // NONE ALL ALL_SIMPLE CURRENT
|
cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE; // NONE ALL ALL_SIMPLE CURRENT
|
||||||
@@ -45,4 +45,3 @@ int main(int argc , char** argv)
|
|||||||
/*----------------------------------------------------------------------*\
|
/*----------------------------------------------------------------------*\
|
||||||
|* End *|
|
|* End *|
|
||||||
\*---------------------------------------------------------------------*/
|
\*---------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user