kernel bias fix and cupti release method (#42)

johncalesp · web-flow · commit 99b187a463da · 2024-02-12T09:41:12.000-05:00
* changed predictor.py

* cupti close connection

* setup.py original config

* changed cupti and runtime versions

---------

Co-authored-by: John Calderon &lt;john.calderon@centml.ai&gt;
diff --git a/analyzer/habitat/analysis/predictor.py b/analyzer/habitat/analysis/predictor.py
@@ -180,7 +180,7 @@ def _conv2d_scale(self, operation, dest_device):
                 merged['padding'][0]
                 if isinstance(merged['padding'], tuple) else merged['padding']
             ),
-            bias=(1 if merged['bias'] is not None else 0),
+            bias=(1 if merged.get('bias',None) != None else 0),
         )
 
         # 3. Call model to make prediction
@@ -214,7 +214,7 @@ def _conv_transpose2d_scale(self, operation, dest_device):
                 merged['padding'][0]
                 if isinstance(merged['padding'], tuple) else merged['padding']
             ),
-            bias=(1 if merged['bias'] is not None else 0),
+            bias=(1 if merged.get('bias',None) != None else 0),
         )
 
         # 3. Call model to make prediction
@@ -251,7 +251,7 @@ def _linear_scale(self, operation, dest_device):
             batch=effective_batch,
             in_features=merged['weight'][1],
             out_features=merged['weight'][0],
-            bias=(1 if merged['bias'] is not None else 0)
+            bias=(1 if merged.get('bias',None) != None else 0)
         )
 
         arguments = [arguments[x] for x in self.linear_pred.model.features]
@@ -293,7 +293,7 @@ def _lstm_scale(self, operation, dest_device):
                 operation.arguments.kwargs,
             )
             arguments = dict(
-                bias=(1 if merged['bias'] is not None else 0),
+                bias=(1 if merged.get('bias',None) != None else 0),
                 bidirectional=(1 if merged['bidirectional'] else 0),
                 batch=merged['input'][1],  # We require the batch to be in position 1
                 seq_len=merged['input'][0],
@@ -310,7 +310,7 @@ def _lstm_scale(self, operation, dest_device):
             )
             max_batch_size = max(operation.arguments.special['batch_sizes'])
             arguments = dict(
-                bias=(1 if merged['bias'] is not None else 0),
+                bias=(1 if merged.get('bias',None) != None else 0),
                 bidirectional=(1 if merged['bidirectional'] else 0),
                 batch=max_batch_size,
                 seq_len=merged['input'][0] // max_batch_size,
diff --git a/analyzer/setup.py b/analyzer/setup.py
@@ -47,8 +47,8 @@
     "torch>=1.4.0",
     "pandas>=1.1.2",
     "tqdm>=4.49.0",
-    "nvidia-cuda-cupti-cu11==11.7.101",
-    "nvidia-cuda-runtime-cu11==11.7.99",
+    "nvidia-cuda-cupti-cu12",
+    "nvidia-cuda-runtime-cu12",
     "incremental"
 ]
 
diff --git a/cpp/src/frontend/profiler.cpp b/cpp/src/frontend/profiler.cpp
@@ -39,5 +39,9 @@ std::vector<KernelInstance> profile(
   return kernels;
 }
 
+void release_cupti_hook(){
+  CuptiManager::instance().unloadCupti();
+}
+
 }
 }
diff --git a/cpp/src/frontend/profiler.h b/cpp/src/frontend/profiler.h
@@ -16,5 +16,6 @@ std::vector<cuda::KernelInstance> profile(std::function<void()> runnable);
 std::vector<cuda::KernelInstance> profile(
     std::function<void()> runnable, const std::string& metric);
 
+void release_cupti_hook();
 }
 }
diff --git a/cpp/src/habitat_cuda.cpp b/cpp/src/habitat_cuda.cpp
@@ -31,4 +31,8 @@ PYBIND11_MODULE(habitat_cuda, m) {
     .def("run_flop_test", [](size_t num_blocks, size_t threads_per_block) {
       habitat::cuda::diagnostics::run_flop_test(num_blocks, threads_per_block);
     }, py::arg("num_blocks") = 8192, py::arg("threads_per_block") = 256);
+  
+  m.def("release_cupti_hook",[](){
+    habitat::frontend::release_cupti_hook();
+  });
 }

Original file line number	Diff line number	Diff line change
`@@ -39,5 +39,9 @@ std::vector<KernelInstance> profile(`
`39`	`39`	`return kernels;`
`40`	`40`	`}`
`41`	`41`
	`42`	`+void release_cupti_hook(){`
	`43`	`+ CuptiManager::instance().unloadCupti();`
	`44`	`+}`
	`45`	`+`
`42`	`46`	`}`
`43`	`47`	`}`
Original file line number	Diff line number	Diff line change
`@@ -16,5 +16,6 @@ std::vector<cuda::KernelInstance> profile(std::function<void()> runnable);`
`16`	`16`	`std::vector<cuda::KernelInstance> profile(`
`17`	`17`	`std::function<void()> runnable, const std::string& metric);`
`18`	`18`
	`19`	`+void release_cupti_hook();`
`19`	`20`	`}`
`20`	`21`	`}`
Original file line number	Diff line number	Diff line change
`@@ -31,4 +31,8 @@ PYBIND11_MODULE(habitat_cuda, m) {`
`31`	`31`	`.def("run_flop_test", [](size_t num_blocks, size_t threads_per_block) {`
`32`	`32`	`habitat::cuda::diagnostics::run_flop_test(num_blocks, threads_per_block);`
`33`	`33`	`}, py::arg("num_blocks") = 8192, py::arg("threads_per_block") = 256);`
	`34`	`+`
	`35`	`+ m.def("release_cupti_hook",[](){`
	`36`	`+ habitat::frontend::release_cupti_hook();`
	`37`	`+ });`
`34`	`38`	`}`