From 17327586562a25947ca4140521f4daaaf3d72daf Mon Sep 17 00:00:00 2001
From: Klagarge <remi@heredero.ch>
Date: Fri, 7 Nov 2025 12:15:18 +0100
Subject: [PATCH] feat(lab02): add Slice GM

---
 .../02_Slice_GM/device/sliceGM_device.cu      | 47 ++++++----
 .../03_Slice/02_Slice_GM/host/SliceGM.cu      | 87 +++++++++++--------
 .../03_Slice/02_Slice_GM/host/SliceGM.h       | 76 ++++++++--------
 .../02_Slice_GM/host/SliceGM_BestGrid.h       | 42 ++++-----
 Student_Cuda/src/main/main.cpp                |  4 +-
 Student_Cuda/src/main/mainBenchmark.cpp       |  4 +-
 Student_Cuda/src/main/mainBruteforce.cpp      |  5 +-
 Student_Cuda/src/main/mainTest.cpp            |  8 +-
 Student_Cuda/src/main/mainUse.cpp             |  8 +-
 9 files changed, 146 insertions(+), 135 deletions(-)
diff --git a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/device/sliceGM_device.cu b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/device/sliceGM_device.cu
index 76c718d..b56eb85 100755
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/device/sliceGM_device.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/device/sliceGM_device.cu
@@ -1,5 +1,5 @@
-#include "Thread2D.cu.h"
 #include "Thread1D.cu.h"
+#include "Thread2D.cu.h"
 #include "cudas.h"
 
 #include <stdio.h>
@@ -16,42 +16,57 @@ static __device__ float f(float x);
 
 /**
  * <pre>
- * Chaque thread effecteur une reduction intrathread avec le patern d'entrelacement,
- * puis stocke son résultat dans SA case dans tabGM
+ * Chaque thread effecteur une reduction intrathread avec le patern
+ * d'entrelacement, puis stocke son résultat dans SA case dans tabGM
  *
  * tabGM est un tableau promu, qui a autant de case que de thread
  * </pre>
  */
-__global__ void reductionIntraThreadGM(float* tabGM , int nbSlice)
-    {
-    // TODO SliceGM (idem SliceGMHOST) pour cette partie
-    }
+__global__ void reductionIntraThreadGM(float *tabGM, int nbSlice) {
+  // TODO SliceGM (idem SliceGMHOST) pour cette partie
+  const int NB_THREAD = Thread2D::nbThread();
+  const int TID = Thread2D::tid();
+
+  const float delta_x = 1.f / (float)nbSlice;
+
+  int s = TID;
+  tabGM[TID] = 0.f;
+
+  while (s < nbSlice) {
+    float xi = s * delta_x;
+    tabGM[TID] += f(xi);
+
+    s += NB_THREAD;
+  }
+}
 
 /**
  * <pre>
  * Effectue la reduction de tabGM cote device, par ecrasement 2 à 2 successif.
- * Ce kernel d ecrasement est appeler depuis le host dans une boucle, avec le bon nombre de thread
+ * Ce kernel d ecrasement est appeler depuis le host dans une boucle, avec le
+ * bon nombre de thread
  *
  * Hypothese : |tabGM| est une puissance de 2
  *
  * Output: le resultat de la reduction est tans tabGM[0]
  * </pre>
  */
-__global__ void ecrasementGM(float* tabGM , int moitier)
-    {
-    // TODO SliceGM
-    }
+__global__ void ecrasementGM(float *tabGM, int moitier) {
+  // TODO SliceGM
+  const int TID = Thread2D::tid();
+  if (TID < moitier) {
+    tabGM[TID] += tabGM[TID + moitier];
+  }
+}
 
 /*--------------------------------------*\
  |*		Private			*|
  \*-------------------------------------*/
 
-__device__ float f(float x)
-    {
+__device__ float f(float x) {
     return 4.f / (1.f + x * x);
-    }
+}
 
 /*----------------------------------------------------------------------*\
  |*			End	 					*|
  \*---------------------------------------------------------------------*/
-
diff --git a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.cu b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.cu
index e82d17d..9ad3e96 100755
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.cu
@@ -1,12 +1,12 @@
 #include "SliceGM.h"
 
-#include <iostream>
 #include <assert.h>
+#include <iostream>
 
 #include "GM.h"
-#include "Maths.h"
 #include "Hardware.h"
 #include "Kernel.h"
+#include "Maths.h"
 
 using std::cout;
 using std::endl;
@@ -16,8 +16,8 @@ using std::to_string;
  |*			Imported 					*|
  \*---------------------------------------------------------------------*/
 
-extern __global__ void reductionIntraThreadGM(float* tabGM,int nbSlice);
-extern __global__ void ecrasementGM(float* tabGM, int moitier);
+extern __global__ void reductionIntraThreadGM(float *tabGM, int nbSlice);
+extern __global__ void ecrasementGM(float *tabGM, int moitier);
 
 /*----------------------------------------------------------------------*\
  |*			Implementation 					*|
@@ -27,21 +27,23 @@ extern __global__ void ecrasementGM(float* tabGM, int moitier);
  |*		Constructeur		*|
  \*-------------------------------------*/
 
-SliceGM::SliceGM(Grid grid , int nbSlice , double* ptrPiHat , bool isVerbose) :
-	RunnableGPU(grid, "SliceGM_" + to_string(nbSlice), isVerbose), // classe parente
-//
-	nbSlice(nbSlice), //
-	ptrPiHat(ptrPiHat) //
-    {
-    this->nTabGM = -1; // TODO SliceGM
-    this->sizeTabGM = -1; //  TODO SliceGM // [octet]
+SliceGM::SliceGM(Grid grid, int nbSlice, double *ptrPiHat, bool isVerbose)
+    : RunnableGPU(grid, "SliceGM_" + to_string(nbSlice),
+                  isVerbose), // classe parente
+                              //
+      nbSlice(nbSlice),       //
+      ptrPiHat(ptrPiHat)      //
+{
+  this->nTabGM = grid.threadCounts();    // TODO SliceGM
+  this->sizeTabGM = nTabGM * sizeof(float); //  TODO SliceGM // [octet]
 
-    }
+  GM::malloc(&tabGM, sizeTabGM);
+}
 
-SliceGM::~SliceGM(void)
-    {
-    // TODO SliceGM
-    }
+SliceGM::~SliceGM(void) {
+  // TODO SliceGM
+    GM::free(tabGM);
+}
 
 /*--------------------------------------*\
  |*		Methode			*|
@@ -54,17 +56,16 @@ SliceGM::~SliceGM(void)
  *	Etape 0 : Promotion d'un tableau en GM (MemoryManagement MM)
  * 	Etape 1 : Reduction intra-thread dans un tableau promu en GM
  * 	Etape 2 : Reduction du tableau en GM par ecrasement hierarchique 2 à 2
- * 		  On lance les kernels d'ecrasement depuis le host (chef d'orchestre)
- * 	Etape 4 : Copy du resultat coter host
- * 	Etape 5 : Destruction GM
+ * 		  On lance les kernels d'ecrasement depuis le host (chef
+ * d'orchestre) Etape 4 : Copy du resultat coter host Etape 5 : Destruction GM
  * </pre>
  */
-void SliceGM::run()
-    {
-    //TODO SliceGM // call the kernel (asynchrone)
+void SliceGM::run() {
+  // TODO SliceGM // call the kernel (asynchrone)
+  reductionIntraThreadGM<<<dg, db>>>(tabGM, nbSlice);
 
-    reductionGM();
-    }
+  reductionGM();
+}
 
 /*--------------------------------------*\
  |*		Private			*|
@@ -75,21 +76,33 @@ void SliceGM::run()
  * Etape 2 : recuperer le resultat coter host
  * Etape 3 : finaliser le calcule de PI
  */
-void SliceGM::reductionGM()
-    {
-    int midle = nTabGM >> 1; // nTabGM/2;
+void SliceGM::reductionGM() {
+  int midle = nTabGM >> 1; // nTabGM/2;
+  dim3 dgx(midle, 1, 1);
+  dim3 dbx(1, 1, 1);
 
-    // TODO SliceGM
+  // TODO SliceGM
+  while (midle >= 1) {
+    ecrasementGM<<<dgx, dbx>>>(tabGM, midle);
+    midle /= 2;
+    dgx.x = midle;
+  }
 
-    // Warning:		Utiliser une autre grille que celle heriter de la classe parente dg, db
-    // 			Votre grid ici doit avoir une taille speciale!
-    // 			N'utiliser donc pas les variables dg et db de la classe parentes
+  float result;
+  GM::memcpyDToH_float(&result, tabGM);
+  const double delta_x = 1.f / (float)nbSlice;
+  *ptrPiHat = result * delta_x;
 
-    // Tip:		Il y a une methode dedier pour ramener un float cote host
-    //
-    //				float resultat;
-    //				GM::memcpyDToH_float(&resultat,ptrResultGM);
-    }
+// Warning:		Utiliser une autre grille que celle heriter de la classe
+// parente dg, db 			Votre grid ici doit avoir une taille
+// speciale! 			N'utiliser donc pas les variables dg et db de la
+// classe parentes
+
+// Tip:		Il y a une methode dedier pour ramener un float cote host
+//
+//				float resultat;
+//				GM::memcpyDToH_float(&resultat,ptrResultGM);
+}
 
 /*----------------------------------------------------------------------*\
  |*			End	 					*|
diff --git a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.h b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.h
index 2044d49..1300d71 100755
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.h
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM.h
@@ -1,62 +1,56 @@
 #pragma once
 
-#include "cudas.h"
 #include "Grid.h"
 #include "RunnableGPU.h"
+#include "cudas.h"
 
 /*----------------------------------------------------------------------*\
  |*			Declaration 					*|
  \*---------------------------------------------------------------------*/
 
-class SliceGM: public RunnableGPU
-    {
-	/*--------------------------------------*\
-	|*		Constructor		*|
-	 \*-------------------------------------*/
+class SliceGM : public RunnableGPU {
+  /*--------------------------------------*\
+  |*		Constructor		*|
+   \*-------------------------------------*/
 
-    public:
+public:
+  /**
+   * update piHat
+   * Hyp : nbThread est une puissance de 2
+   */
+  SliceGM(Grid grid, int nbSlice, double *ptrPiHat, bool isVerbose);
 
-	/**
-	 * update piHat
-	 * Hyp : nbThread est une puissance de 2
-	 */
-	SliceGM(Grid grid , int nbSlice , double* ptrPiHat , bool isVerbose);
+  virtual ~SliceGM();
 
-	virtual ~SliceGM();
+  /*--------------------------------------*\
+  |*		Methodes		*|
+   \*-------------------------------------*/
 
-	/*--------------------------------------*\
-	|*		Methodes		*|
-	 \*-------------------------------------*/
+public:
+  /**
+   * override
+   */
+  virtual void run();
 
-    public:
+private:
+  void reductionGM();
 
-	/**
-	 * override
-	 */
-	virtual void run();
+  /*--------------------------------------*\
+  |*		Attributs		*|
+   \*-------------------------------------*/
 
-    private:
+private:
+  // Inputs
+  int nbSlice;
 
-	void reductionGM();
+  // Inputs/Outputs
+  double *ptrPiHat;
 
-	/*--------------------------------------*\
-	|*		Attributs		*|
-	 \*-------------------------------------*/
-
-    private:
-
-	// Inputs
-	int nbSlice;
-
-	// Inputs/Outputs
-	double* ptrPiHat;
-
-	// Tools
-	float* tabGM;
-	size_t sizeTabGM; // [octet]
-	int nTabGM;
-
-    };
+  // Tools
+  float *tabGM;
+  size_t sizeTabGM; // [octet]
+  int nTabGM;
+};
 
 /*----------------------------------------------------------------------*\
  |*			End	 					*|
diff --git a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM_BestGrid.h b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM_BestGrid.h
index 3dfc4cc..d8dd57e 100755
--- a/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM_BestGrid.h
+++ b/Student_Cuda/src/core/01_student/03_Slice/02_Slice_GM/host/SliceGM_BestGrid.h
@@ -1,45 +1,35 @@
 #pragma once
 
-#include <iostream>
 #include <assert.h>
+#include <iostream>
 
+#include "Couts.h"
 #include "Grid.h"
 #include "Hardware.h"
-#include "Couts.h"
 
 /*----------------------------------------------------------------------*\
  |*			Impelmentation 					*|
  \*---------------------------------------------------------------------*/
 
-namespace sliceGM
-    {
+namespace sliceGM {
 
-    class BestGrid
-	{
+class BestGrid {
 
-	public:
+public:
+  static Grid get() {
+    const bool IS_CHECK_HEURISTIC = true;
 
-	    static Grid get()
-		{
-		const bool IS_CHECK_HEURISTIC = false;
+    const int MP = 64; // Hardware::getMPCount();
+    const int CORE_MP = Hardware::getCoreCountMP();
 
-		const int MP = Hardware::getMPCount();
+    dim3 dg(MP, 1, 1);                     // power 2 // TODO SliceGM grid
+    dim3 db(CORE_MP, 4, 1);                // power 2 // TODO SliceGM grid
+    Grid grid(dg, db, IS_CHECK_HEURISTIC); // all power 2
 
-		dim3 dg(1, 1, 1); // power 2 // TODO SliceGM grid
-		dim3 db(1, 1, 1); // power 2 // TODO SliceGM grid
-		Grid grid(dg, db, IS_CHECK_HEURISTIC); // all power 2
-
-		// to remove once coded
-		    {
-		    Couts::redln("aie aie aie, your best grid won t build itself");
-		    assert(false);
-		    }
-
-		return grid;
-		}
-
-	};
-    }
+    return grid;
+  }
+};
+} // namespace sliceGM
 
 /*----------------------------------------------------------------------*\
  |*			End	 					*|
diff --git a/Student_Cuda/src/main/main.cpp b/Student_Cuda/src/main/main.cpp
index c47ed23..5e198e6 100755
--- a/Student_Cuda/src/main/main.cpp
+++ b/Student_Cuda/src/main/main.cpp
@@ -28,8 +28,8 @@ int main(int argc , char** argv)
 
     // public
 	{
-	cudaContext.deviceId = 0; //  in [0,2] width Server Cuda3
-	cudaContext.launchMode = LaunchModeMOO::TEST; // USE TEST  BENCHMARK  FORCEBRUT
+	cudaContext.deviceId = 1; //  in [0,2] width Server Cuda3
+	cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST  BENCHMARK  FORCEBRUT
 
 	cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT   LOAD_ALL
 	cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE;   // NONE  ALL  ALL_SIMPLE  CURRENT
diff --git a/Student_Cuda/src/main/mainBenchmark.cpp b/Student_Cuda/src/main/mainBenchmark.cpp
index 6c1c85a..148447c 100755
--- a/Student_Cuda/src/main/mainBenchmark.cpp
+++ b/Student_Cuda/src/main/mainBenchmark.cpp
@@ -58,8 +58,8 @@ int mainBenchmark()
 
     // Slice
 	{
-	sliceGMHOST();
-	//sliceGM();
+	// sliceGMHOST();
+	sliceGM();
 	//sliceSM();
 	//sliceMulti();
 	}
diff --git a/Student_Cuda/src/main/mainBruteforce.cpp b/Student_Cuda/src/main/mainBruteforce.cpp
index 50b8926..02ca114 100755
--- a/Student_Cuda/src/main/mainBruteforce.cpp
+++ b/Student_Cuda/src/main/mainBruteforce.cpp
@@ -74,8 +74,8 @@ int mainBrutforce()
 
     // Slice
 	{
-	sliceGMHOST(&matlab);
-//	sliceGM(&matlab);
+	// sliceGMHOST(&matlab);
+	sliceGM(&matlab);
 //	sliceSM(&matlab);
 	}
 
@@ -374,4 +374,3 @@ void bruteforce(ProviderUse_I* ptrProviderUse , Matlab* ptrMatlab , const PlotTy
 /*----------------------------------------------------------------------*\
  |*			End	 					*|
  \*---------------------------------------------------------------------*/
-
diff --git a/Student_Cuda/src/main/mainTest.cpp b/Student_Cuda/src/main/mainTest.cpp
index 88e25d5..4046c5c 100755
--- a/Student_Cuda/src/main/mainTest.cpp
+++ b/Student_Cuda/src/main/mainTest.cpp
@@ -54,13 +54,13 @@ int mainTest()
  */
 void slice()
     {
-    VTSliceGMHOST test1;
-    // VTSliceGM test2;
+    // VTSliceGMHOST test1;
+    VTSliceGM test2;
     // VTSliceSM test3;
 
 
-    test1.run();
-   // test2.run();
+    // test1.run();
+   test2.run();
    // test3.run();
     }
 
diff --git a/Student_Cuda/src/main/mainUse.cpp b/Student_Cuda/src/main/mainUse.cpp
index 0d84384..e827da5 100755
--- a/Student_Cuda/src/main/mainUse.cpp
+++ b/Student_Cuda/src/main/mainUse.cpp
@@ -66,12 +66,12 @@ int mainUse()
  */
 void slice(bool& isOk)
     {
-    SliceGmHostUse sliceGmHostUse(IS_VERBOSE);
-    // SliceGmUse sliceGmUse(IS_VERBOSE);
+    // SliceGmHostUse sliceGmHostUse(IS_VERBOSE);
+    SliceGmUse sliceGmUse(IS_VERBOSE);
     // SliceSmUse sliceSmUse(IS_VERBOSE);
 
-    isOk &= sliceGmHostUse.isOk(IS_VERBOSE);
-//    isOk &= sliceGmUse.isOk(IS_VERBOSE);
+    // isOk &= sliceGmHostUse.isOk(IS_VERBOSE);
+   isOk &= sliceGmUse.isOk(IS_VERBOSE);
 //    isOk &= sliceSmUse.isOk(IS_VERBOSE);
     }