feat(lab02): add Slice GM

2025-11-07 12:15:18 +01:00
parent dcd3df8f89
commit 1732758656
9 changed files with 146 additions and 135 deletions
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/device/sliceGM_device.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/device/sliceGM_device.cu
@@ -1,5 +1,5 @@
 #include "Thread2D.cu.h"
 #include "Thread1D.cu.h"
 #include "Thread2D.cu.h"
 #include "cudas.h"
 #include <stdio.h>
@@ -16,42 +16,57 @@ static __device__ float f(float x);
 /**
 * <pre>
- * Chaque thread effecteur une reduction intrathread avec le patern d'entrelacement,
+ * Chaque thread effecteur une reduction intrathread avec le patern
- * puis stocke son résultat dans SA case dans tabGM
+ * d'entrelacement, puis stocke son résultat dans SA case dans tabGM
 *
 * tabGM est un tableau promu, qui a autant de case que de thread
 * </pre>
 */
-__global__ void reductionIntraThreadGM(float* tabGM , int nbSlice)
+__global__ void reductionIntraThreadGM(float *tabGM, int nbSlice) {
    {
  // TODO SliceGM (idem SliceGMHOST) pour cette partie
  const int NB_THREAD = Thread2D::nbThread();
  const int TID = Thread2D::tid();
  const float delta_x = 1.f / (float)nbSlice;
  int s = TID;
  tabGM[TID] = 0.f;
  while (s < nbSlice) {
    float xi = s * delta_x;
    tabGM[TID] += f(xi);
    s += NB_THREAD;
  }
 }
 /**
 * <pre>
 * Effectue la reduction de tabGM cote device, par ecrasement 2 à 2 successif.
- * Ce kernel d ecrasement est appeler depuis le host dans une boucle, avec le bon nombre de thread
+ * Ce kernel d ecrasement est appeler depuis le host dans une boucle, avec le
 * bon nombre de thread
 *
 * Hypothese : |tabGM| est une puissance de 2
 *
 * Output: le resultat de la reduction est tans tabGM[0]
 * </pre>
 */
-__global__ void ecrasementGM(float* tabGM , int moitier)
+__global__ void ecrasementGM(float *tabGM, int moitier) {
    {
  // TODO SliceGM
  const int TID = Thread2D::tid();
  if (TID < moitier) {
    tabGM[TID] += tabGM[TID + moitier];
  }
 }
 /*--------------------------------------*\
 |*		Private			*|
 \*-------------------------------------*/
-__device__ float f(float x)
+__device__ float f(float x) {
    {
    return 4.f / (1.f + x * x);
 }
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.cu
@@ -1,12 +1,12 @@
 #include "SliceGM.h"
 #include <iostream>
 #include <assert.h>
 #include <iostream>
 #include "GM.h"
 #include "Maths.h"
 #include "Hardware.h"
 #include "Kernel.h"
 #include "Maths.h"
 using std::cout;
 using std::endl;
@@ -27,20 +27,22 @@ extern __global__ void ecrasementGM(float* tabGM, int moitier);
 |*		Constructeur		*|
 \*-------------------------------------*/
-SliceGM::SliceGM(Grid grid , int nbSlice , double* ptrPiHat , bool isVerbose) :
+SliceGM::SliceGM(Grid grid, int nbSlice, double *ptrPiHat, bool isVerbose)
-	RunnableGPU(grid, "SliceGM_" + to_string(nbSlice), isVerbose), // classe parente
+    : RunnableGPU(grid, "SliceGM_" + to_string(nbSlice),
                  isVerbose), // classe parente
                              //
      nbSlice(nbSlice),       //
      ptrPiHat(ptrPiHat)      //
 {
-    this->nTabGM = -1; // TODO SliceGM
+  this->nTabGM = grid.threadCounts();    // TODO SliceGM
-    this->sizeTabGM = -1; //  TODO SliceGM // [octet]
+  this->sizeTabGM = nTabGM * sizeof(float); //  TODO SliceGM // [octet]
  GM::malloc(&tabGM, sizeTabGM);
 }
-SliceGM::~SliceGM(void)
+SliceGM::~SliceGM(void) {
    {
  // TODO SliceGM
    GM::free(tabGM);
 }
 /*--------------------------------------*\
@@ -54,14 +56,13 @@ SliceGM::~SliceGM(void)
 *	Etape 0 : Promotion d'un tableau en GM (MemoryManagement MM)
 * 	Etape 1 : Reduction intra-thread dans un tableau promu en GM
 * 	Etape 2 : Reduction du tableau en GM par ecrasement hierarchique 2 à 2
- * 		  On lance les kernels d'ecrasement depuis le host (chef d'orchestre)
+ * 		  On lance les kernels d'ecrasement depuis le host (chef
- * 	Etape 4 : Copy du resultat coter host
+ * d'orchestre) Etape 4 : Copy du resultat coter host Etape 5 : Destruction GM
 * 	Etape 5 : Destruction GM
 * </pre>
 */
-void SliceGM::run()
+void SliceGM::run() {
    {
  // TODO SliceGM // call the kernel (asynchrone)
  reductionIntraThreadGM<<<dg, db>>>(tabGM, nbSlice);
  reductionGM();
 }
@@ -75,15 +76,27 @@ void SliceGM::run()
 * Etape 2 : recuperer le resultat coter host
 * Etape 3 : finaliser le calcule de PI
 */
-void SliceGM::reductionGM()
+void SliceGM::reductionGM() {
    {
  int midle = nTabGM >> 1; // nTabGM/2;
  dim3 dgx(midle, 1, 1);
  dim3 dbx(1, 1, 1);
  // TODO SliceGM
  while (midle >= 1) {
    ecrasementGM<<<dgx, dbx>>>(tabGM, midle);
    midle /= 2;
    dgx.x = midle;
  }
-    // Warning:		Utiliser une autre grille que celle heriter de la classe parente dg, db
+  float result;
-    // 			Votre grid ici doit avoir une taille speciale!
+  GM::memcpyDToH_float(&result, tabGM);
-    // 			N'utiliser donc pas les variables dg et db de la classe parentes
+  const double delta_x = 1.f / (float)nbSlice;
  *ptrPiHat = result * delta_x;
 // Warning:		Utiliser une autre grille que celle heriter de la classe
 // parente dg, db 			Votre grid ici doit avoir une taille
 // speciale! 			N'utiliser donc pas les variables dg et db de la
 // classe parentes
 // Tip:		Il y a une methode dedier pour ramener un float cote host
 //
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.h
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.h
@@ -1,21 +1,19 @@
 #pragma once
 #include "cudas.h"
 #include "Grid.h"
 #include "RunnableGPU.h"
 #include "cudas.h"
 /*----------------------------------------------------------------------*\
 |*			Declaration 					*|
 \*---------------------------------------------------------------------*/
-class SliceGM: public RunnableGPU
+class SliceGM : public RunnableGPU {
    {
  /*--------------------------------------*\
  |*		Constructor		*|
   \*-------------------------------------*/
 public:
  /**
   * update piHat
   * Hyp : nbThread est une puissance de 2
@@ -29,14 +27,12 @@ class SliceGM: public RunnableGPU
   \*-------------------------------------*/
 public:
  /**
   * override
   */
  virtual void run();
 private:
  void reductionGM();
  /*--------------------------------------*\
@@ -44,7 +40,6 @@ class SliceGM: public RunnableGPU
   \*-------------------------------------*/
 private:
  // Inputs
  int nbSlice;
@@ -55,7 +50,6 @@ class SliceGM: public RunnableGPU
  float *tabGM;
  size_t sizeTabGM; // [octet]
  int nTabGM;
 };
 /*----------------------------------------------------------------------*\
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM_BestGrid.h
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM_BestGrid.h
@@ -1,45 +1,35 @@
 #pragma once
 #include <iostream>
 #include <assert.h>
 #include <iostream>
 #include "Couts.h"
 #include "Grid.h"
 #include "Hardware.h"
 #include "Couts.h"
 /*----------------------------------------------------------------------*\
 |*			Impelmentation 					*|
 \*---------------------------------------------------------------------*/
-namespace sliceGM
+namespace sliceGM {
    {
-    class BestGrid
+class BestGrid {
 	{
 public:
  static Grid get() {
    const bool IS_CHECK_HEURISTIC = true;
-	    static Grid get()
+    const int MP = 64; // Hardware::getMPCount();
-		{
+    const int CORE_MP = Hardware::getCoreCountMP();
 		const bool IS_CHECK_HEURISTIC = false;
-		const int MP = Hardware::getMPCount();
+    dim3 dg(MP, 1, 1);                     // power 2 // TODO SliceGM grid
-
+    dim3 db(CORE_MP, 4, 1);                // power 2 // TODO SliceGM grid
 		dim3 dg(1, 1, 1); // power 2 // TODO SliceGM grid
 		dim3 db(1, 1, 1); // power 2 // TODO SliceGM grid
    Grid grid(dg, db, IS_CHECK_HEURISTIC); // all power 2
 		// to remove once coded
 		    {
 		    Couts::redln("aie aie aie, your best grid won t build itself");
 		    assert(false);
 		    }
    return grid;
  }
 };
-    }
+} // namespace sliceGM
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
--- a/Student_Cuda/src/main/main.cpp
+++ b/Student_Cuda/src/main/main.cpp
@@ -28,8 +28,8 @@ int main(int argc , char** argv)
    // public
 	{
-	cudaContext.deviceId = 0; //  in [0,2] width Server Cuda3
+	cudaContext.deviceId = 1; //  in [0,2] width Server Cuda3
-	cudaContext.launchMode = LaunchModeMOO::TEST; // USE TEST  BENCHMARK  FORCEBRUT
+	cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST  BENCHMARK  FORCEBRUT
 	cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT   LOAD_ALL
 	cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE;   // NONE  ALL  ALL_SIMPLE  CURRENT
--- a/Student_Cuda/src/main/mainBenchmark.cpp
+++ b/Student_Cuda/src/main/mainBenchmark.cpp
@@ -58,8 +58,8 @@ int mainBenchmark()
    // Slice
 	{
-	sliceGMHOST();
+	// sliceGMHOST();
-	//sliceGM();
+	sliceGM();
 	//sliceSM();
 	//sliceMulti();
 	}
--- a/Student_Cuda/src/main/mainBruteforce.cpp
+++ b/Student_Cuda/src/main/mainBruteforce.cpp
@@ -74,8 +74,8 @@ int mainBrutforce()
    // Slice
 	{
-	sliceGMHOST(&matlab);
+	// sliceGMHOST(&matlab);
-//	sliceGM(&matlab);
+	sliceGM(&matlab);
 //	sliceSM(&matlab);
 	}
@@ -374,4 +374,3 @@ void bruteforce(ProviderUse_I* ptrProviderUse , Matlab* ptrMatlab , const PlotTy
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda/src/main/mainTest.cpp
+++ b/Student_Cuda/src/main/mainTest.cpp
@@ -54,13 +54,13 @@ int mainTest()
 */
 void slice()
    {
-    VTSliceGMHOST test1;
+    // VTSliceGMHOST test1;
-    // VTSliceGM test2;
+    VTSliceGM test2;
    // VTSliceSM test3;
-    test1.run();
+    // test1.run();
-   // test2.run();
+   test2.run();
   // test3.run();
    }
--- a/Student_Cuda/src/main/mainUse.cpp
+++ b/Student_Cuda/src/main/mainUse.cpp
@@ -66,12 +66,12 @@ int mainUse()
 */
 void slice(bool& isOk)
    {
-    SliceGmHostUse sliceGmHostUse(IS_VERBOSE);
+    // SliceGmHostUse sliceGmHostUse(IS_VERBOSE);
-    // SliceGmUse sliceGmUse(IS_VERBOSE);
+    SliceGmUse sliceGmUse(IS_VERBOSE);
    // SliceSmUse sliceSmUse(IS_VERBOSE);
-    isOk &= sliceGmHostUse.isOk(IS_VERBOSE);
+    // isOk &= sliceGmHostUse.isOk(IS_VERBOSE);
-//    isOk &= sliceGmUse.isOk(IS_VERBOSE);
+   isOk &= sliceGmUse.isOk(IS_VERBOSE);
 //    isOk &= sliceSmUse.isOk(IS_VERBOSE);
    }