From bf9a0a29414e589fe8ded1ff12b1a45fcac3dbd7 Mon Sep 17 00:00:00 2001 From: Klagarge Date: Mon, 24 Nov 2025 23:21:59 +0100 Subject: [PATCH] feat(sliceSM): add sliceSM --- .zed/tasks.json | 6 +-- Student_Cuda/.clangd | 3 ++ .../03_Slice_SM/device/sliceSM_device.cu | 39 ++++++++++++++----- .../03_Slice/03_Slice_SM/host/SliceSM.cu | 30 +++++++------- .../03_Slice/03_Slice_SM/host/SliceSM.h | 5 +-- .../03_Slice_SM/host/SliceSM_BestGrid.h | 25 ++++++------ Student_Cuda/src/main/main.cpp | 2 +- Student_Cuda/src/main/mainBenchmark.cpp | 5 ++- Student_Cuda/src/main/mainBruteforce.cpp | 4 +- Student_Cuda/src/main/mainTest.cpp | 13 +++---- Student_Cuda/src/main/mainUse.cpp | 8 ++-- 11 files changed, 79 insertions(+), 61 deletions(-) diff --git a/.zed/tasks.json b/.zed/tasks.json index a6c8342..e3d5ef1 100644 --- a/.zed/tasks.json +++ b/.zed/tasks.json @@ -1,10 +1,8 @@ -// Project tasks configuration. See https://zed.dev/docs/tasks for documentation. -// -// Example: [ { - "label": "Run", + "label": "Run CUDA - Student_Cuda", "command": "cbicc cuda clean jall run", + "cwd": "Student_Cuda", "use_new_terminal": false, "allow_concurrent_runs": false, "reveal": "always", diff --git a/Student_Cuda/.clangd b/Student_Cuda/.clangd index 7c92112..ba398d5 100644 --- a/Student_Cuda/.clangd +++ b/Student_Cuda/.clangd @@ -2,6 +2,9 @@ CompileFlags: Add: - "-I/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda/INC_SYMLINK/EXT" - "-I/home/mse15/CUDA/toStudent/code/WCudaStudent/Student_Cuda/INC_SYMLINK/PROJECT" + - "-std=c++17" + - "-x" + - "cuda" --- Diagnostics: Suppress: "*" diff --git a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/device/sliceSM_device.cu b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/device/sliceSM_device.cu index b6ee84c..fa4abe8 100755 --- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/device/sliceSM_device.cu +++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/device/sliceSM_device.cu @@ -17,16 +17,21 @@ static __device__ float f(float x); |* Implementation *| \*---------------------------------------------------------------------*/ -__global__ void sliceSM(int nbSlice , float* ptrPiHatGM) - { +__global__ +void sliceSM(int nbSlice , float* ptrPiHatGM) { // TODO SliceSM // Reception tabSM + extern __shared__ float tabSM[]; // ReductionIntraThread + reductionIntraThread(tabSM, nbSlice); + __syncthreads(); // Reduction de tabSM (use tools ReductionAdd) + ReductionAdd::reduce(tabSM, ptrPiHatGM); + // __syncthreads(); necessaire? ou? pas a la fin en tout cas - } +} /*--------------------------------------*\ |* Private *| @@ -35,19 +40,33 @@ __global__ void sliceSM(int nbSlice , float* ptrPiHatGM) /** * remplit la sm */ -void reductionIntraThread(float* tabSM , int nbSlice) - { +static +__device__ +void reductionIntraThread(float* tabSM , int nbSlice) { // TODO SliceSM // Warning: Il faut employer TID et TID_LOCAL - } + const int TID = Thread2D::tid(); + const int localTID = Thread2D::tidLocal(); + const int NB_THREAD = Thread2D::nbThread(); -__device__ float f(float x) - { - return 4.f / (1.f + x * x); + const float delta_x = 1.f / (float)nbSlice; + + int s = TID; + tabSM[localTID] = 0.f; + + while (s < nbSlice) { + float xi = s * delta_x; + tabSM[localTID] += f(xi); + s += NB_THREAD; } +} + +__device__ +float f(float x) { + return 4.f / (1.f + x * x); +} /*----------------------------------------------------------------------*\ |* End *| \*---------------------------------------------------------------------*/ - diff --git a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.cu b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.cu index 63836d6..493ab71 100755 --- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.cu +++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.cu @@ -25,34 +25,31 @@ extern __global__ void sliceSM(int nbSlice,float* ptrPiHatGM); \*-------------------------------------*/ SliceSM::SliceSM(const Grid& grid , int nbSlice , double* ptrPiHat , bool isVerbose) : - RunnableGPU(grid, "SliceSM_" + to_string(nbSlice), isVerbose), // classe parente - // - ptrPiHat(ptrPiHat), // - nbSlice(nbSlice) // - { - this->sizeSM = -1; //TODO SliceSM + RunnableGPU(grid, "SliceSM_" + to_string(nbSlice), isVerbose), + ptrPiHat(ptrPiHat), + nbSlice(nbSlice) { + this->sizeSM = grid.threadByBlock() * sizeof(float); //TODO SliceSM // MM { // TODO SliceSM (pas oublier de mettre a zero, avec mallocfloat0 par exemple) - + GM::mallocFloat0(&ptrPiHatGM); // Tip: Il y a une methode dedier pour malloquer un float cote device et l'initialiser a zero // // GM::mallocfloat0(&ptrPiHatGM); } - } +} -SliceSM::~SliceSM(void) - { +SliceSM::~SliceSM(void) { //TODO SliceSM - } + GM::free(ptrPiHatGM); +} /*--------------------------------------*\ |* Methode *| \*-------------------------------------*/ -void SliceSM::run() - { +void SliceSM::run() { // Etape 1 : lancer le kernel // Etape 2 : recuperer le resultat coter host (par exemple avec memcpyDToH_float) // Etape 3 : finaliser le calcul de PI @@ -60,7 +57,12 @@ void SliceSM::run() // Solution : double result; // et ramener dans result, transferer et finaliser ensuite dans ptrPiHat // TODO SliceSM - } + sliceSM<<sizeSM>>>(this->nbSlice, this->ptrPiHatGM); + float result; + GM::memcpyDToH_float(&result, this->ptrPiHatGM); + const double delta_x = 1.0 / (double) this->nbSlice; + *this->ptrPiHat = (double) result * delta_x; +} ///////////////////////// // Rappel: diff --git a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.h b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.h index 7264654..3645e7b 100755 --- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.h +++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.h @@ -8,8 +8,7 @@ |* Declaration *| \*---------------------------------------------------------------------*/ -class SliceSM: public RunnableGPU - { +class SliceSM: public RunnableGPU { /*--------------------------------------*\ |* Constructor *| \*-------------------------------------*/ @@ -51,7 +50,7 @@ class SliceSM: public RunnableGPU size_t sizeSM; // [octet] float* ptrPiHatGM; - }; +}; /*----------------------------------------------------------------------*\ |* End *| diff --git a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM_BestGrid.h b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM_BestGrid.h index b59cebd..6f4e9ce 100755 --- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM_BestGrid.h +++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM_BestGrid.h @@ -11,29 +11,26 @@ |* Impelmentation *| \*---------------------------------------------------------------------*/ -namespace sliceSM - { +namespace sliceSM { - class BestGrid - { + class BestGrid { public: - static Grid get() - { - const int MP = Hardware::getMPCount(); + static Grid get() { + const int MP = Hardware::getMPCount(); + const int CORE_MP = Hardware::getCoreCountMP(); - // TODO SliceGMHOST grid + // TODO SliceGMHOST grid + dim3 dg(MP, 6, 1); + dim3 db(CORE_MP, 2, 1); + Grid grid(dg, db); - // to remove once coded - { - Couts::redln("aie aie aie, your best grid won t build itself"); - assert(false); - } + return grid; } }; - } +} /*----------------------------------------------------------------------*\ |* End *| diff --git a/Student_Cuda/src/main/main.cpp b/Student_Cuda/src/main/main.cpp index 5e198e6..c757d67 100755 --- a/Student_Cuda/src/main/main.cpp +++ b/Student_Cuda/src/main/main.cpp @@ -29,7 +29,7 @@ int main(int argc , char** argv) // public { cudaContext.deviceId = 1; // in [0,2] width Server Cuda3 - cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST BENCHMARK FORCEBRUT + cudaContext.launchMode = LaunchModeMOO::TEST; // USE TEST BENCHMARK FORCEBRUT cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT LOAD_ALL cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE; // NONE ALL ALL_SIMPLE CURRENT diff --git a/Student_Cuda/src/main/mainBenchmark.cpp b/Student_Cuda/src/main/mainBenchmark.cpp index 148447c..4f562cf 100755 --- a/Student_Cuda/src/main/mainBenchmark.cpp +++ b/Student_Cuda/src/main/mainBenchmark.cpp @@ -34,6 +34,7 @@ using std::endl; static void sliceGMHOST(); static void sliceGM(); +static void sliceSM(); static void montecarloMono(); @@ -59,8 +60,8 @@ int mainBenchmark() // Slice { // sliceGMHOST(); - sliceGM(); - //sliceSM(); + // sliceGM(); + sliceSM(); //sliceMulti(); } diff --git a/Student_Cuda/src/main/mainBruteforce.cpp b/Student_Cuda/src/main/mainBruteforce.cpp index 02ca114..387e73c 100755 --- a/Student_Cuda/src/main/mainBruteforce.cpp +++ b/Student_Cuda/src/main/mainBruteforce.cpp @@ -75,8 +75,8 @@ int mainBrutforce() // Slice { // sliceGMHOST(&matlab); - sliceGM(&matlab); -// sliceSM(&matlab); + // sliceGM(&matlab); + sliceSM(&matlab); } // Montecarlo diff --git a/Student_Cuda/src/main/mainTest.cpp b/Student_Cuda/src/main/mainTest.cpp index 4046c5c..b8858d8 100755 --- a/Student_Cuda/src/main/mainTest.cpp +++ b/Student_Cuda/src/main/mainTest.cpp @@ -52,17 +52,16 @@ int mainTest() /** * activer ci-dessous la version souhaiter */ -void slice() - { +void slice() { // VTSliceGMHOST test1; - VTSliceGM test2; - // VTSliceSM test3; + // VTSliceGM test2; + VTSliceSM test3; // test1.run(); - test2.run(); - // test3.run(); - } + // test2.run(); + test3.run(); +} /** * activer ci-dessous la version souhaiter diff --git a/Student_Cuda/src/main/mainUse.cpp b/Student_Cuda/src/main/mainUse.cpp index e827da5..fb9ac41 100755 --- a/Student_Cuda/src/main/mainUse.cpp +++ b/Student_Cuda/src/main/mainUse.cpp @@ -67,12 +67,12 @@ int mainUse() void slice(bool& isOk) { // SliceGmHostUse sliceGmHostUse(IS_VERBOSE); - SliceGmUse sliceGmUse(IS_VERBOSE); - // SliceSmUse sliceSmUse(IS_VERBOSE); + // SliceGmUse sliceGmUse(IS_VERBOSE); + SliceSmUse sliceSmUse(IS_VERBOSE); // isOk &= sliceGmHostUse.isOk(IS_VERBOSE); - isOk &= sliceGmUse.isOk(IS_VERBOSE); -// isOk &= sliceSmUse.isOk(IS_VERBOSE); + // isOk &= sliceGmUse.isOk(IS_VERBOSE); + isOk &= sliceSmUse.isOk(IS_VERBOSE); } /**