chore: init

This commit is contained in:
2025-09-19 15:36:37 +02:00
commit 9d707a253a
2190 changed files with 17598 additions and 0 deletions

View File

@@ -0,0 +1,119 @@
#pragma once
#include "Thread1D.cu.h"
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
class ReductionAdd
{
public:
/**
* Hypothese:
*
* (H1) On suppose que T est un type simple sur lequel atomicAdd existe
*
* Exemple :
* ReductionAdd::reduce(tabSm,ptrDevResultatGM);
*
* Contraintes :
*
* (C1) |tabSM| puissance de 2, comme 2,4,8,16,32,64,128,256,512,1024
* (C2) |ThreadByBlock|=|tabSM|
* (C3) Reduction intra-thread laisser a l'utilsiateur (ie remplissage de tabSM)
*
* Warning :
*
* (W1) ptrResultGM n'est pas un tableau, mais un scalaire contenant le resultat final
* (W2) Oubliez pas le MM pour ptrResultGM
* (W3) Oubliez pas l'initialisation de ptrResultGM
* Exemples :
*
* addition : initialisation a zero avec un
*
* GM::memclear(...)
*
* multiplication : initialisation a 1 avec un
*
* GM::memcpyHtoD(...)
*
* ou d'eun seul coup au malloc
*
* GM::mallocfloat0(&ptrResultGM);
* GM::mallocInt0(&ptrResultGM);
*
*/
template <typename T>
static __device__ void reduce(T* tabSM, T* ptrResultGM)
{
// Rappel :
// |ThreadByBlock|=|tabSM| .
// Il y autant de case en SM que de thread par block.
// Chaque thread possede son armoire en SM
// 1 thread <---> 1 armoire
// TODO ReductionAdd
// reductionIntraBlock
// reductionInterblock
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou? pas a le fin en tous les cas
}
private:
/*--------------------------------------*\
|* reductionIntraBlock *|
\*-------------------------------------*/
/**
* used by reductionIntraBlock
*/
template <typename T>
static __device__ void ecrasement(T* tabSM, int middle)
{
// Indications :
// (I1) je suis un thread, je dois faire quoi ?
// (I2) Tous les threads doivent-ils faire quelquechose?
// (I3) Travailler sous l hypothese d'une grid2d,avec Thread2D
// TODO ReductionAdd
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
}
/**
* Sur place, le resultat est dans tabSM[0]
*/
template <typename T>
static __device__ void reductionIntraBlock(T* tabSM)
{
// Ecrasement sucessifs dans une boucle (utiliser la methode ecrasement ci-dessus)
// TODO ReductionAdd
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
}
/*--------------------------------------*\
|* reductionInterblock *|
\*-------------------------------------*/
template <typename T>
static __device__ void reductionInterBlock(T* tabSM, T* ptrResultGM)
{
// Indication:
// (I1) Utiliser atomicAdd(pointeurDestination, valeurSource);
// (i2) Travailler sous l hypothese d'une grid2d,avec Thread2D
// TODO ReductionAdd
// __syncthreads();// pour touts les threads d'un meme block, necessaires? ou?
}
};
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,106 @@
#pragma once
#include "Lock.cu.h"
#include "Thread1D.cu.h"
/*----------------------------------------------------------------------*\
|* prt fonction / reduction *|
\*---------------------------------------------------------------------*/
#define BinaryOperator(name) T (*name)(T, T)
#define AtomicOp(name) void (*name)(T*, T)
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
class Reduction
{
public:
/**
* Hypothese:
*
* (H1) BinaryOperator un operateur binaire sur des element de Type T
* (H2) AtomicOp permet de realiser des operations atomics
*
* Usage example :
*
* Version1:
*
* __device__ int add(int x, int y) {return x+y;}
* __device__ void addAtomic(int* ptrX, int y) {atomicAdd(ptrX,y);}
*
* ReductionGeneric::reduce(add,addAtomic,tabSm,ptrResultGM);
*
* Version2:
*
* __device__ int add(int x, int y){return x+y;}
*
* #include "Lock.cu.h"
* __device__ int volatile mutex = 0; // variable global
* __device__ void addAtomic(int* ptrX, int y) // 10x plus lent que version1, mais plus flexible
* {
* Lock locker(&mutex);
* locker.lock();
* (*ptrX)+=y;
* locker.unlock();
* }
*
* ReductionGeneric::reduce(add,addAtomic,tabSm,ptrResultGM);
*/
template <typename T>
static __device__ void reduce(BinaryOperator(OP) ,AtomicOp(ATOMIC_OP), T* tabSM, T* ptrResultGM)
//static __device__ void reduce(T (*OP)(T, T) ,void (*ATOMIC_OP)(T*, T), T* tabSM, T* ptrResultGM) // idem ci-dessus mais sans define
{
// Meme principe que ReductionAdd
// TODO ReductionGeneric
// Meme principe que ReductionAdd
}
private:
/*--------------------------------------*\
|* reductionIntraBlock *|
\*-------------------------------------*/
/**
* used by reductionIntraBlock
*/
template <typename T>
static __device__ void ecrasement(BinaryOperator(OP),T* tabSM, int middle)
{
// TODO ReductionGeneric
// Meme principe que ReductionAdd
// OP est la variable representant l'operateur binaire
}
/**
* Sur place, le resultat est dans tabSM[0]
*/
template <typename T>
static __device__ void reductionIntraBlock(BinaryOperator(OP),T* tabSM)
{
// TODO ReductionGeneric
// Meme principe que ReductionAdd
// OP est la variable representant l'operateur binaire
}
/*--------------------------------------*\
|* reductionInterblock *|
\*-------------------------------------*/
template <typename T>
static __device__ void reductionInterBlock(AtomicOp(ATOMIC_OP), T* tabSM, T* ptrResultGM)
{
// TODO ReductionGeneric
// Meme principe que ReductionAdd
// ATOMIC_OP est la variable representant l'operateur binaire atomic
}
};
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,67 @@
// Rappel : basique type et variable
//
// int f(int x) // prototype de f: on donne le type (int) et on donne le nom de variable (x)
// {
// return x*x;
// }
//
// void use()
// {
// int x=1;
//
// // x est la variable
// // int est le type
//
// int y=f(x); // utilisation de f: on donne la variable only (x)
// }
// Rappel 1 : pointeur fonction
//
// int add(int x, int y) // est de type int (*)(int,int)
// {
// return x+y;
// }
//
// int mult(int x, int y) // est de type int (*)(int,int)
// {
// return x*y;
// }
//
// int reduceV1( int (*OP)(int,int), int x, int y) // on donne le type et le nom de variable, y compris pour pointeur fonction
// {
// // int (*OP)(int,int) type et nom de variable
// // OP variable
// // int (*)(int,int) type
//
// return OP(x,y); // on utilise ici les variables! OP est une variable, x et y aussi!
// }
//
// void use1()
// {
// int x=1;
// int y=2;
//
// int zAdd=reduceV1(add,x,y); // utilisation de reduce, on donne variable only, ie add, x et y , mais surtout pas type!
// // Le nom d'une fonction est un pointeur vers le code la fonction,
// // comme le nom d'un tableau est un pointeur vers la première case du tableau
//
// int zMult=reduceV1(mult,x,y); // utilisation de reduce, on donne, comme a toute utilisation les variables seulement.
// }
// Rappel 2: pointeur fonction
//
// #define binaryOperator(name) int (*name)(int,int)
//
// //int reduceV1(int (*OP)(int,int) , int x, int y)
// int reduceV2(binaryOperator(OP), int x, int y)
// {
// return OP(x,y); // Warning, OP est le nom de la variable
// }
//
// void use2() // idem use ci-dessus use1, aucun changement!
// {
// int x=1;
// int y=2;
// int zAdd=reduceV2(add,x,y); // utilisation de reduce, on donne variable only!
// int zMult=reduceV2(mult,x,y); // utilisation de reduce, on donne variable only!
// }

View File

@@ -0,0 +1,47 @@
#include "Thread1D.cu.h"
#include "cudas.h"
#include "ReductionAdd.cu.h"
#include <stdio.h>
/*----------------------------------------------------------------------*\
|* Private *|
\*---------------------------------------------------------------------*/
static __device__ void reductionIntraThread(int* tabSM);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/**
* 1 partout en tabSM
*/
__global__ void KAddIntProtocoleI(int* ptrSumGM)
{
// TODO ReductionAddIntI
// Reception tabSM
// ReductionIntraThread
// ReductionAdd
// __syncthreads(); // des threads de meme block!// Question : utile? ou?
}
/*--------------------------------------*\
|* Private *|
\*-------------------------------------*/
/**
* 1 partout en tabSM
*/
__device__ void reductionIntraThread(int* tabSM)
{
// TODO ReductionAddIntI
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,63 @@
#include "ReductionAddIntI.h"
#include "GM.h"
#include "Grid.h"
#include <iostream>
#include <assert.h>
using std::cout;
using std::endl;
using std::to_string;
/*----------------------------------------------------------------------*\
|* Imported *|
\*---------------------------------------------------------------------*/
extern __global__ void KAddIntProtocoleI(int* ptrSumGM);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/*--------------------------------------*\
|* Constructeur *|
\*-------------------------------------*/
ReductionAddIntI::ReductionAddIntI(const Grid& grid , int* ptrSum , bool isVerbose) :
//RunnableGPU(grid, "Reduce_Add_IntI_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_Add_IntI", isVerbose), // classe parente
ptrSum(ptrSum)
{
// TODO ReductionAddIntI
// MM pour ptrSumGM (oubliez pas initialisation)
this->sizeSM = -1;
// Tip: Il y a une methode dedier pour malloquer un int cote device et l'initialiser a zero
//
// GM::mallocInt0(&ptrSumGM);
}
ReductionAddIntI::~ReductionAddIntI()
{
// TODO ReductionAddIntI
}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
void ReductionAddIntI::run()
{
// TODO ReductionAddIntI
// appeler le kernel
// recuperer le resulat coter host
// Tip: Il y a une methode dedier ramener coter host un int
//
// GM::memcpyDtoH_int(ptrDestination, ptrSourceGM););
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,51 @@
#pragma once
#include "cudas.h"
#include "Grid.h"
#include "RunnableGPU.h"
/*----------------------------------------------------------------------*\
|* Declaration *|
\*---------------------------------------------------------------------*/
class ReductionAddIntI: public RunnableGPU
{
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
public:
ReductionAddIntI(const Grid& grid , int* ptrSum, bool isVerbose);
virtual ~ReductionAddIntI();
/*--------------------------------------*\
|* Methodes *|
\*-------------------------------------*/
public:
/**
* override
*/
virtual void run();
/*--------------------------------------*\
|* Attributs *|
\*-------------------------------------*/
private:
// Inputs/Outputs
int* ptrSum;
// Tools
int* ptrSumGM;
size_t sizeSM;
};
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,41 @@
#include "Thread1D.cu.h"
#include "cudas.h"
#include "ReductionAdd.cu.h"
#include <stdio.h>
/*----------------------------------------------------------------------*\
|* Private *|
\*---------------------------------------------------------------------*/
static __device__ void reductionIntraThread(int* tabSM);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/**
* TID partout en tabSM
*/
__global__ void KAddIntProtocoleII(int* ptrSumGM)
{
// TODO ReductionAddIntII
}
/*--------------------------------------*\
|* Private *|
\*-------------------------------------*/
/**
* TID partout en tabSM
*/
__device__ void reductionIntraThread(int* tabSM)
{
// TODO ReductionAddIntII
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,52 @@
#include "ReductionAddIntII.h"
#include "GM.h"
#include "Grid.h"
#include <iostream>
#include <assert.h>
using std::cout;
using std::endl;
using std::to_string;
/*----------------------------------------------------------------------*\
|* Imported *|
\*---------------------------------------------------------------------*/
extern __global__ void KAddIntProtocoleII(int* ptrSumGM);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/*--------------------------------------*\
|* Constructeur *|
\*-------------------------------------*/
ReductionAddIntII::ReductionAddIntII(const Grid& grid , int* ptrSum,bool isVerbose) :
//RunnableGPU(grid, "Reduce_AddInt_II_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_AddInt_II",isVerbose), // classe parente
ptrSum(ptrSum)
{
// TODO ReductionAddIntII
this->sizeSM = -1;
}
ReductionAddIntII::~ReductionAddIntII()
{
// TODO ReductionAddIntII
}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
void ReductionAddIntII::run()
{
// TODO ReductionAddIntII
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,51 @@
#pragma once
#include "cudas.h"
#include "Grid.h"
#include "RunnableGPU.h"
/*----------------------------------------------------------------------*\
|* Declaration *|
\*---------------------------------------------------------------------*/
class ReductionAddIntII: public RunnableGPU
{
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
public:
ReductionAddIntII(const Grid& grid , int* ptrSum,bool isVerbose);
virtual ~ReductionAddIntII();
/*--------------------------------------*\
|* Methodes *|
\*-------------------------------------*/
public:
/**
* override
*/
virtual void run();
/*--------------------------------------*\
|* Attributs *|
\*-------------------------------------*/
private:
// Inputs/Outputs
int* ptrSum;
// Tools
int* ptrSumGM;
size_t sizeSM;
};
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,81 @@
#include "Thread1D.cu.h"
#include "cudas.h"
#include "Reduction.cu.h"
#include <stdio.h>
/*----------------------------------------------------------------------*\
|* Private *|
\*---------------------------------------------------------------------*/
static __device__ void reductionIntraThread(int* tabSM);
// Operateur reduction
static __device__ int add(int x , int y);
static __device__ void addAtomicV1(int* ptrX , int y);
static __device__ void addAtomicV2(int* ptrX , int y);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/**
* 1 partout en tabSM
*/
__global__ void KIntProtocoleI(int* ptrSumGM)
{
// TODO ReductionIntI
}
/*--------------------------------------*\
|* Private *|
\*-------------------------------------*/
/**
* 1 partout en tabSM
*/
__device__ void reductionIntraThread(int* tabSM)
{
// TODO ReductionIntI
}
/*----------------------------*\
|* Operateur reduction *|
\*---------------------------*/
__inline__
__device__ int add(int x , int y)
{
// TODO ReductionIntI
}
/**
* Utiliser la methode system : atomicAdd(pointeurDestination, valeurSource);
*/
__inline__
__device__ void addAtomicV1(int* ptrX , int y)
{
// TODO ReductionIntI
}
/**
* 10x plus lent,mais plus flexible!
* Necessaire si un vrai operateur atomic n'existe pas
* Necessaire aussi pour des objets par exemple
*/
__device__ int volatile mutex = 0; //variable global
__device__ void addAtomicV2(int* ptrX , int y)
{
Lock locker(&mutex);
locker.lock();
// TODO ReductionIntI
locker.unlock();
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,52 @@
#include "ReductionIntI.h"
#include "GM.h"
#include "Grid.h"
#include <iostream>
#include <assert.h>
using std::cout;
using std::endl;
using std::to_string;
/*----------------------------------------------------------------------*\
|* Imported *|
\*---------------------------------------------------------------------*/
extern __global__ void KIntProtocoleI(int* ptrSumGM);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/*--------------------------------------*\
|* Constructeur *|
\*-------------------------------------*/
ReductionIntI::ReductionIntI(const Grid& grid , int* ptrSum,bool isVerbose) :
//RunnableGPU(grid, "Redude_Generic_IntI_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_Generic_IntI",isVerbose), // classe parente
ptrSum(ptrSum)
{
// TODO ReductionIntI
this->sizeSM = -1;
}
ReductionIntI::~ReductionIntI()
{
// TODO ReductionIntI
}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
void ReductionIntI::run()
{
// TODO ReductionIntI
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,51 @@
#pragma once
#include "cudas.h"
#include "Grid.h"
#include "RunnableGPU.h"
/*----------------------------------------------------------------------*\
|* Declaration *|
\*---------------------------------------------------------------------*/
class ReductionIntI: public RunnableGPU
{
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
public:
ReductionIntI(const Grid& grid , int* ptrSum,bool isVerbose);
virtual ~ReductionIntI();
/*--------------------------------------*\
|* Methodes *|
\*-------------------------------------*/
public:
/**
* override
*/
virtual void run();
/*--------------------------------------*\
|* Attributs *|
\*-------------------------------------*/
private:
// Inputs/Outputs
int* ptrSum;
// Tools
int* ptrSumGM;
size_t sizeSM;
};
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,81 @@
#include "Thread1D.cu.h"
#include "cudas.h"
#include "Reduction.cu.h"
#include "Lock.cu.h"
#include <stdio.h>
/*----------------------------------------------------------------------*\
|* Private *|
\*---------------------------------------------------------------------*/
static __device__ void reductionIntraThread(int* tabSM);
// Operateur reduction
static __device__ int add(int x , int y);
static __device__ void addAtomicV1(int* ptrX , int y);
static __device__ void addAtomicV2(int* ptrX , int y);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/**
* TID partout en tabSM
*/
__global__ void KIntProtocoleII(int* ptrSumGM)
{
// TODO ReductionIntII
}
/*--------------------------------------*\
|* Private *|
\*-------------------------------------*/
/**
* TID partout en tabSM
*/
__device__ void reductionIntraThread(int* tabSM)
{
// TODO ReductionIntII
}
/*----------------------------*\
|* Operateur reduction *|
\*---------------------------*/
__inline__
__device__ int add(int x , int y)
{
// TODO ReductionIntII
}
/**
* Utiliser la methode system : atomicAdd(pointeurDestination, valeurSource);
*/
__inline__
__device__ void addAtomicV1(int* ptrX , int y)
{
// TODO ReductionIntII
}
/**
* Une alternative, moins performante, mais generalisable serait d'employer un lock
* Tip : le Lock est implementer avec deux methodes atomic
*/
__device__ int volatile mutex = 0; //variable global
__device__ void addAtomicV2(int* ptrX , int y)
{
Lock locker(&mutex);
locker.lock();
// TODO ReductionIntII
locker.unlock();
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,52 @@
#include "ReductionIntII.h"
#include "GM.h"
#include "Grid.h"
#include <iostream>
#include <assert.h>
using std::cout;
using std::endl;
using std::to_string;
/*----------------------------------------------------------------------*\
|* Imported *|
\*---------------------------------------------------------------------*/
extern __global__ void KIntProtocoleII(int* ptrSumGM);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/*--------------------------------------*\
|* Constructeur *|
\*-------------------------------------*/
ReductionIntII::ReductionIntII(const Grid& grid , int* ptrSum,bool isVerbose) :
//RunnableGPU(grid, "Reduce_Generic_IntII_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_Generic_IntII" ,isVerbose), // classe parente
ptrSum(ptrSum)
{
// TODO ReductionIntII
this->sizeSM = -1;
}
ReductionIntII::~ReductionIntII()
{
// TODO ReductionIntII
}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
void ReductionIntII::run()
{
// TODO ReductionIntII
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,51 @@
#pragma once
#include "cudas.h"
#include "Grid.h"
#include "RunnableGPU.h"
/*----------------------------------------------------------------------*\
|* Declaration *|
\*---------------------------------------------------------------------*/
class ReductionIntII: public RunnableGPU
{
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
public:
ReductionIntII(const Grid& grid , int* ptrSum,bool isVerbose);
virtual ~ReductionIntII();
/*--------------------------------------*\
|* Methodes *|
\*-------------------------------------*/
public:
/**
* override
*/
virtual void run();
/*--------------------------------------*\
|* Attributs *|
\*-------------------------------------*/
private:
// Inputs/Outputs
int* ptrSum;
// Tools
int* ptrSumGM;
size_t sizeSM;
};
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,109 @@
#include "Thread1D_long.cu.h"
#include "cudas.h"
#include "Reduction.cu.h"
#include "Lock.cu.h"
#include <stdio.h>
/*----------------------------------------------------------------------*\
|* Private *|
\*---------------------------------------------------------------------*/
static __device__ void reductionIntraThread(long* tabSM);
// Operateur reduction
static __device__ long add(long x , long y);
static __device__ void addAtomic(long* ptrX , long y);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/**
* TID partout en tabSM
*/
__global__ void KLongProtocoleII(long* ptrSumGM)
{
// TODO ReductionLongII
}
/*--------------------------------------*\
|* Private *|
\*-------------------------------------*/
/**
* TID partout en tabSM
*/
__device__ void reductionIntraThread(long* tabSM)
{ // Rappel : Dans le protocoleII on cherche a calculer
//
// x=x+i avec i in [0,N]
//
// ie la somme des entiers allant de 1 à N
//
// Ces entiers sont mis dans tabSM, puis on somme le contenu des tabSM
//
// Technique : Chaque thread depose en tabSM, sont TID!
//
// Rappel : Chaque thread possede une et exactement une case en tabSM (relation 1-1)
//
// Warning : L'utilisation du type long permet dans le protocoleII de sommer plus d'entier.
//
// Pour sommer plus d'entier, on prend plus de thread (cf relation 1-1) ci-dessus.
//
// Plus de thread, c'est une grille plus grande, et dès lors le TID va devenir tres grand, plus grand que MAX_INT
//
// Faux: const int TID=Indice1D.tid(); // ne va pas fonctionner pour pour les grandes grids car max(TID)>MAX_INT
//
// Juste (v1): const long TID=((long)blockDim.x * (long) blockIdx.x)+ (long)threadIdx.x;
//
// Juste (V2): const long TID=Thread1D_long::tid();
//
// Juste (V3): const long TID=Thread2D_long::tid();
//
// Attention
//
// Tout ceci est vrai seulement ici, dans le cadre du protocoleII
// TODO ReductionLongII
// pour TID utiliser const long TID=Thread2D_long::tid(); // (nouvelle methode)
// pour TID_LOCAL utiliser const int TID_LOCAL=Thread2D::tidLocal(); // (methode habituelle)
}
/*----------------------------*\
|* Operateur reduction *|
\*---------------------------*/
__inline__
__device__ long add(long x , long y)
{
// TODO ReductionLongII
}
/**
* Utiliser la methode system, si elle existe
*
* atomicAdd(pointeurDestination, valeurSource)
*
* ou la technique du lock vu precedement!
*
* Question : atomicAdd pour les long existe?
*/
__device__ int volatile mutex = 0; //variable global
__device__ void addAtomic(long* ptrX , long y)
{
Lock locker(&mutex);
locker.lock();
// TODO ReductionLongII
locker.unlock();
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,52 @@
#include "ReductionLongII.h"
#include "GM.h"
#include "Grid.h"
#include <iostream>
#include <assert.h>
using std::cout;
using std::endl;
using std::to_string;
/*----------------------------------------------------------------------*\
|* Imported *|
\*---------------------------------------------------------------------*/
extern __global__ void KLongProtocoleII(long* ptrSumGM);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
/*--------------------------------------*\
|* Constructeur *|
\*-------------------------------------*/
ReductionLongII::ReductionLongII(const Grid& grid , long* ptrSum,bool isVerbose) :
//RunnableGPU(grid, "Reduce_Generic_LongII_" + to_string(grid.threadCounts()),isVerbose), // classe parente
RunnableGPU(grid, "Reduce_Generic_LongII",isVerbose), // classe parente
ptrSum(ptrSum)
{
// TODO ReductionLongII
this->sizeSM = -1;
}
ReductionLongII::~ReductionLongII()
{
// TODO ReductionLongII
}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
void ReductionLongII::run()
{
// TODO ReductionLongII
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,51 @@
#pragma once
#include "cudas.h"
#include "Grid.h"
#include "RunnableGPU.h"
/*----------------------------------------------------------------------*\
|* Declaration *|
\*---------------------------------------------------------------------*/
class ReductionLongII: public RunnableGPU
{
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
public:
ReductionLongII(const Grid& grid , long* ptrSum,bool isVerbose);
virtual ~ReductionLongII();
/*--------------------------------------*\
|* Methodes *|
\*-------------------------------------*/
public:
/**
* override
*/
virtual void run();
/*--------------------------------------*\
|* Attributs *|
\*-------------------------------------*/
private:
// Inputs/Outputs
long* ptrSum;
// Tools
long* ptrSumGM;
size_t sizeSM;
};
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,48 @@
#include <iostream>
using std::cout;
using std::cerr;
using std::endl;
// ReductionTools add
#include "ReductionAddIntI.h"
#include "ReductionAddIntII.h"
// ReductionTools generic
#include "ReductionIntI.h"
#include "ReductionIntII.h"
#include "ReductionLongII.h"
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
RunnableGPU* createReductionAddIntI(const Grid& grid , int* ptrSum , bool isVerbose)
{
return new ReductionAddIntI(grid, ptrSum, isVerbose);
}
RunnableGPU* createReductionAddIntII(const Grid& grid , int* ptrSum , bool isVerbose)
{
return new ReductionAddIntII(grid, ptrSum, isVerbose);
}
RunnableGPU* createReductionIntI(const Grid& grid , int* ptrSum , bool isVerbose)
{
return new ReductionIntI(grid, ptrSum, isVerbose);
}
RunnableGPU* createReductionIntII(const Grid& grid , int* ptrSum , bool isVerbose)
{
return new ReductionIntII(grid, ptrSum, isVerbose);
}
RunnableGPU* createReductionLongII(const Grid& grid , long* ptrSum , bool isVerbose)
{
return new ReductionLongII(grid, ptrSum, isVerbose);
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,48 @@
#include <iostream>
#include "CudaContext.h"
#include "Limits.h"
using std::cout;
using std::cerr;
using std::endl;
/*----------------------------------------------------------------------*\
|* Imported *|
\*---------------------------------------------------------------------*/
extern int mainUse();
extern int mainTest();
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
int main(int argc , char** argv)
{
// Limits::show();
CudaContext cudaContext;
// public
{
cudaContext.deviceId = 0; // in [0,2] width Server Cuda3
cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST (only)
cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT LOAD_ALL
cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE; // NONE ALL ALL_SIMPLE CURRENT
}
// private
{
cudaContext.mainUse = mainUse;
cudaContext.mainTest = mainTest;
}
return cudaContext.process();
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,72 @@
#include <stdlib.h>
#include <iostream>
#include <string>
// add
#include "VTReductionAddIntI.h"
#include "VTReductionAddIntII.h"
// generic
#include "VTReductionGenericI.h"
#include "VTReductionGenericII.h"
#include "VTReductionGenericLongII.h"
using std::string;
using std::cout;
using std::endl;
/*----------------------------------------------------------------------*\
|* Declaration *|
\*---------------------------------------------------------------------*/
static void add();
static void generic();
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
int mainTest()
{
// activer ci-dessous seulement le TP voulu (pas tous)
add();
//generic();
return EXIT_SUCCESS;
}
/*--------------------------------------*\
|* private *|
\*-------------------------------------*/
/**
* activer ci-dessous la version souhaiter
*/
void add()
{
VTReductionAddIntI test1;
VTReductionAddIntII test2;
test1.run();
//test2.run();
}
/**
* activer ci-dessous la version souhaiter
*/
void generic()
{
VTReductionGenericI test1;
VTReductionGenericII test2;
VTReductionGenericLongII test3;
test1.run();
// test2.run();
// test3.run();
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -0,0 +1,110 @@
#include <iostream>
#include <stdlib.h>
using std::cerr;
using std::cout;
using std::endl;
#include "Couts.h"
// ReductionTools add
#include "UseReductionAddIntI.h"
#include "UseReductionAddIntII.h"
// ReductionTools generic
#include "UseReductionIntI.h"
#include "UseReductionIntII.h"
#include "UseReductionLongII.h"
/*----------------------------------------------------------------------*\
|* declaration *|
\*---------------------------------------------------------------------*/
static void reduction_add(bool& isOk);
static void reduction_generic(bool& isOk);
static void print(bool isSuccess);
/*----------------------------------------------------------------------*\
|* Implementation *|
\*---------------------------------------------------------------------*/
static const int IS_VERBOSE = true;
int mainUse()
{
// activer ci-dessous seulement le TP voulu (pas tous)
bool isOk = true;
reduction_add(isOk); // voir code ci-dessous pour activer la version voulue
//reduction_generic(isOk); // voir code ci-dessous pour activer la version voulue
print(isOk);
return isOk ? EXIT_SUCCESS : EXIT_FAILURE;
}
/*----------------------------------------------------------------------*\
|* TP *|
\*---------------------------------------------------------------------*/
/**
* activer ci-dessous la version souhaiter
*/
void reduction_add(bool& isOk)
{
// InbI
{
UseReductionAddIntI algo(IS_VERBOSE);
isOk &= algo.isOk(IS_VERBOSE);
}
// IntII
// {
// UseReductionAddIntII algo(IS_VERBOSE);
// isOk &= algo.isOk(IS_VERBOSE);
// }
}
/**
* activer ci-dessous la version souhaiter
*/
void reduction_generic(bool& isOk)
{
// InbI
{
UseReductionIntI algo(IS_VERBOSE);
isOk &= algo.isOk(IS_VERBOSE);
}
// IntII
{
UseReductionIntII algo(IS_VERBOSE);
isOk &= algo.isOk(IS_VERBOSE);
}
// LongII
{
UseReductionLongII algo(IS_VERBOSE);
isOk &= algo.isOk(IS_VERBOSE);
}
}
/*----------------------------------------------------------------------*\
|* Tools *|
\*---------------------------------------------------------------------*/
void print(bool isSuccess)
{
cout << endl << Couts::REVERSE;
Couts::status(isSuccess, "Success, Congratulations !", "Failed, sorry!");
cout << endl << Couts::RESET;
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/