Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions common/CudaWorker/CudaWorkerThread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@

#include <chrono>


inline int format_as(cudaError_enum type) {
return static_cast<int>(type);
}



#include "FieldWorkers.hpp"
#include <DcgmTaskRunner.h>

Expand Down
7 changes: 6 additions & 1 deletion common/CudaWorker/FieldWorkers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,21 @@
*/
#pragma once

#include <cublas_proxy.hpp>
//#include <cublas_proxy.hpp>
#include "../cublas_proxy/cublas_proxy.hpp"
#include <cuda.h>

#if (CUDA_VERSION_USED >= 11)
#include "DcgmDgemm.hpp"
#endif

#include <DcgmLogging.h>

#include <fmt/format.h>
#include <timelib.h>
inline int format_as(cublasStatus_t type) {
return static_cast<int>(type);
}

using namespace Dcgm;

Expand Down
2 changes: 1 addition & 1 deletion common/DcgmLogging.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include <mutex>
#include <string>
#include <syslog.h>

#include "format.hpp" // now define formatting for classes in this module
#define DCGM_LOGGING_SEVERITY_OPTIONS "NONE, FATAL, ERROR, WARN, INFO, DEBUG, VERB"

#define DCGM_LOGGING_SEVERITY_STRING_VERBOSE "VERB"
Expand Down
2 changes: 2 additions & 0 deletions common/DcgmMutex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <chrono>
#include <ratio>

inline int format_as(dcgmMutexSt type) { return static_cast<int>(type);}

/*****************************************************************************/
DcgmMutex::DcgmMutex(int timeoutMs)
// Cast to long long to avoid overflowing before widening to a long long
Expand Down
2 changes: 1 addition & 1 deletion common/DcgmWatchTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "DcgmLogging.h"
#include "DcgmUtilities.h"
#include "DcgmWatchTable.h"

inline int format_as(DcgmWatcherType_t type) { return static_cast<int>(type);}
/*****************************************************************************/
DcgmWatchTable::DcgmWatchTable()
: m_entityWatchHashTable()
Expand Down
4 changes: 2 additions & 2 deletions common/tests/TaskRunnerTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ TEST_CASE("TaskRunner: Limited Queue")

tr.Stop();
fmt::print("Wait iterations elapsed: {}\n", cWaitIterations - waitIterations);
fmt::print("Iterations: {}\nExecutions: {}\nFailed to add: {}\n", iterations, executed, failedToAdd);
fmt::print("Iterations: {}\nExecutions: {}\nFailed to add: {}\n", (int)iterations, (int)executed, (int)failedToAdd);
REQUIRE(executed >= cTaskRunnerCapacity);
REQUIRE((failedToAdd + executed) == iterations);
}
Expand All @@ -310,4 +310,4 @@ TEST_CASE("TaskRunner: Task with attempts")
REQUIRE(fut.has_value());
REQUIRE_THROWS_AS((*fut).get(), std::future_error);
tr.Stop();
}
}
30 changes: 30 additions & 0 deletions dcgmlib/format.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef DCGM_FORMAT_H
#define DCGM_FORMAT_H
#ifdef DcgmWatcherType_t
inline int format_as(DcgmWatcherType_t type) { return static_cast<int>(type);}
#endif
inline int format_as(dcgmChipArchitecture_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmConfigType_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmDiagnosticLevel_t type) { return static_cast<int>(type);}

inline int format_as(dcgmGroupType_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmHealthSystems_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmModuleId_t type) { return static_cast<int>(type);}
inline int format_as(dcgmModuleStatus_t type) { return static_cast<int>(type);}
//inline int format_as(dcgmMutexSt type) { return static_cast<int>(type);}

#ifdef dcgmNvLinkLinkState
inline int format_as(dcgmNvLinkLinkState type) { return static_cast<int>(type);}
#endif
inline int format_as(dcgmOrder_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmPolicyCondition_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmPolicyValidation_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmPolicyAction_enum type) { return static_cast<int>(type);}

inline int format_as(dcgmReturn_enum type) { return static_cast<int>(type);}
inline int format_as(dcgm_field_entity_group_t type) { return static_cast<int>(type);}

#ifdef nvmlReturn_enum
inline int format_as(nvmlReturn_enum type) { return static_cast<int>(type);}
#endif
#endif
6 changes: 4 additions & 2 deletions dcgmlib/src/DcgmCacheManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@
#include <TimeLib.hpp>
#include <dcgm_agent.h>
#include <dcgm_nvswitch_structs.h>

#include <fmt/chrono.h>
#include <fmt/format.h>

#include <algorithm>
#include <cmath>
#include <cstring>
Expand All @@ -43,6 +41,10 @@
#include <sys/types.h>
#include <unistd.h>

inline int format_as(nvmlReturn_enum type) { return static_cast<int>(type);}
inline int format_as(dcgmNvLinkLinkState_t type) { return static_cast<int>(type);}
inline int format_as(DcgmWatcherType_t type) { return static_cast<int>(type);}
inline int format_as(dcgmEntityStatusType_enum type) { return static_cast<int>(type);}

#define DRIVER_VERSION_510 510

Expand Down
2 changes: 1 addition & 1 deletion dcgmlib/src/DcgmGroupManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "DcgmSettings.h"
#include <fmt/format.h>
#include <stdexcept>

inline int format_as(dcgmEntityStatusType_enum type) { return static_cast<int>(type);}
/*****************************************************************************
* Implementation for Group Manager Class
*****************************************************************************/
Expand Down
2 changes: 1 addition & 1 deletion dcgmlib/src/DcgmHostEngineHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
#include <nvml_injection.h>
#include <ranges>
#endif

inline int format_as(DcgmWatcherType_t type) { return static_cast<int>(type);}
DcgmHostEngineHandler *DcgmHostEngineHandler::mpHostEngineHandlerInstance = nullptr;
DcgmModuleCore DcgmHostEngineHandler::mModuleCoreObj;

Expand Down
2 changes: 1 addition & 1 deletion dcgmlib/src/DcgmVgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "dcgm_structs.h"
#include "dcgm_structs_internal.h"
#include <DcgmStringHelpers.h>

inline int format_as(nvmlReturn_enum type) { return static_cast<int>(type);}
/*****************************************************************************/
static std::string_view ConvertNvmlGridLicenseStateToString(unsigned int licenseState)
{
Expand Down
2 changes: 1 addition & 1 deletion dcgmproftester/DcgmProfTester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "dcgm_structs.h"
#include "timelib.h"
#include "vector_types.h"
#include <cublas_proxy.hpp>
#include "../cublas_proxy/cublas_proxy.hpp"
#include <cuda.h>
#include <dcgm_agent.h>

Expand Down
3 changes: 2 additions & 1 deletion dcgmproftester/PhysicalGpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
#include "dcgm_fields_internal.hpp"
#include "timelib.h"
#include "vector_types.h"
#include <cublas_proxy.hpp>
//#include <cublas_proxy.hpp>
#include "../cublas_proxy/cublas_proxy.hpp"
#include <cuda.h>
#include <dcgm_agent.h>

Expand Down
5 changes: 2 additions & 3 deletions modules/nvswitch/DcgmNvSwitchManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,15 @@
#include <optional>
#include <string>
#include <tuple>

#include <DcgmLogging.h>
#include <DcgmSettings.h>

#include "FieldIds.h"
#include "NvSwitchData.h"
#include "UpdateFunctions.h"

#include "DcgmNvSwitchManager.h"

inline int format_as(dcgmNvLinkLinkState_t type) { return static_cast<int>(type);}

namespace DcgmNs
{
using phys_id_t = uint32_t;
Expand Down
2 changes: 2 additions & 0 deletions modules/policy/DcgmPolicyManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,4 +167,6 @@ class DcgmPolicyManager
dcgmReturn_t WatchFields(dcgm_connection_id_t connectionId);
};

inline int format_as(DcgmViolationPolicyAlert_enum type) { return static_cast<int>(type);}

#endif // DCGMPOLICYMANAGER_H
6 changes: 6 additions & 0 deletions nvml-injection/include/InjectionArgument.h
Original file line number Diff line number Diff line change
Expand Up @@ -2039,12 +2039,18 @@ class InjectionArgument
return m_value.processInfo_v1Ptr;
}

/*
nvml-injection/include/InjectionArgument.h:2042:5: error: ‘InjectionArgument::InjectionArgument(nvmlProcessInfo_v2_t*)’ cannot be overloaded with ‘InjectionArgument::InjectionArgument(nvmlProcessInfo_t*)’
2042 | InjectionArgument(nvmlProcessInfo_v2_t *processInfo_v2Ptr)
| ^~~~~~~~~~~~~~~~~

InjectionArgument(nvmlProcessInfo_v2_t *processInfo_v2Ptr)
: m_type(INJECTION_PROCESSINFO_V2_PTR)
{
memset(&m_value, 0, sizeof(m_value));
m_value.processInfo_v2Ptr = processInfo_v2Ptr;
}
*/
nvmlProcessInfo_v2_t *AsProcessInfo_v2Ptr() const
{
return m_value.processInfo_v2Ptr;
Expand Down
5 changes: 5 additions & 0 deletions nvvs/include/JsonResult.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
#include <string>
#include <vector>

inline int format_as(nvvsReturn_t type) {
return static_cast<int>(type);
}

/*
* This file contains the JSON serialization and deserialization logic and structures for the NVVS JSON result format.
* Here is an example of the JSON format:
Expand Down Expand Up @@ -69,6 +73,7 @@
namespace DcgmNs::Nvvs::Json
{


struct Info
{
std::vector<std::string> messages;
Expand Down
12 changes: 6 additions & 6 deletions nvvs/plugin_src/memory/l1tag.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

#include "L1TagCuda.h"
#include "newrandom.h"
//#include "newrandom.h"

__device__ void ReportError
(
Expand Down Expand Up @@ -66,11 +66,11 @@ extern "C" __global__ void InitL1Data(const L1TagParams params)

// Init RNG (each SM data region will have the same data)
unsigned64 s[2];
InitRand<2>(s, params.randSeed + threadIdx.x);
//InitRand<2>(s, params.randSeed + threadIdx.x);

for (uint32_t i = threadIdx.x; i < smidDataBytes / sizeof(*buf); i += blockDim.x)
{
const uint16_t rnd = static_cast<uint16_t>(FastRand(s) >> 48);
const uint16_t rnd = 2;//static_cast<uint16_t>(FastRand(s) >> 48);
buf[i] = EncodeOffset(i, rnd);
}
}
Expand All @@ -92,8 +92,8 @@ extern "C" __global__ void L1TagTest(const L1TagParams params)

// Init RNG (each SM will use the same seed, for equivalent data accesses)
unsigned64 s[2];
InitRand<2>(s, params.randSeed + hwtid);
uint32_t rnd = static_cast<uint32_t>(FastRand(s));
//InitRand<2>(s, params.randSeed + hwtid);
uint32_t rnd = 1;//static_cast<uint32_t>(FastRand(s));

// Run the test for the specified iterations
for (uint64_t iter = 0; iter < params.iterations; iter++)
Expand Down Expand Up @@ -168,7 +168,7 @@ extern "C" __global__ void L1TagTest(const L1TagParams params)
}

// Always use a new random offset
rnd = static_cast<uint32_t>(FastRand(s));
rnd = 3;//static_cast<uint32_t>(FastRand(s));
}
}
}
5 changes: 5 additions & 0 deletions nvvs/plugin_src/memtest/Memtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@
#include <random>
#include <span>

//inline int format_as(dcgmDiagnosticLevel_t type) { return static_cast<int>(type);}
inline int format_as(cudaError_enum type) { return static_cast<int>(type);}
inline int format_as(cudaError type) { return static_cast<int>(type);}


const unsigned int NUM_ITERATIONS = 1000;

static __thread unsigned long *err_addr;
Expand Down
3 changes: 2 additions & 1 deletion nvvs/plugin_src/pcie/Pcie.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
#include "PluginDevice.h"


#include <cublas_proxy.hpp>
//#include <cublas_proxy.hpp>
#include "../cublas_proxy/cublas_proxy.hpp"
#include <dcgm_structs.h>
#include <iostream>
#include <string>
Expand Down
3 changes: 2 additions & 1 deletion nvvs/plugin_src/targetedpower/TargetedPower_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
#include <CudaCommon.h>

#include <NvvsStructs.h>
#include <cublas_proxy.hpp>
//#include <cublas_proxy.hpp>
#include "../cublas_proxy/cublas_proxy.hpp"
#include <cuda.h>

#define TP_MAX_DIMENSION 8192 /* Maximum single dimension */
Expand Down
6 changes: 5 additions & 1 deletion nvvs/src/NvidiaValidationSuite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@
#include <time.h>
#include <vector>

using namespace DcgmNs::Nvvs;


using namespace DcgmNs::Nvvs;
inline int format_as(dcgmEntityStatusType_enum type) { return static_cast<int>(type);}
//inline int format_as(dcgmDiagnosticLevel_t type) { return static_cast<int>(type);}
//inline int format_as(cudaError_enum type) { return static_cast<int>(type);}
DcgmHandle dcgmHandle;
DcgmSystem dcgmSystem;
NvvsCommon nvvsCommon __attribute__((visibility("default")));
Expand Down
5 changes: 5 additions & 0 deletions nvvs/src/Plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
#include "Plugin.h"
#include "PluginStrings.h"


inline int format_as(nvvsPluginResult_enum type) {
return static_cast<int>(type);
}

const double DUMMY_TEMPERATURE_VALUE = 30.0;

/*************************************************************************/
Expand Down
4 changes: 4 additions & 0 deletions nvvs/src/PluginLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

#include <dlfcn.h>

inline int format_as(dcgmDiagAuxDataType type) {
return static_cast<int>(type);
}


/*****************************************************************************/
PluginLib::PluginLib()
Expand Down
5 changes: 5 additions & 0 deletions testing/TestDiagResponseWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
#include "NvvsJsonStrings.h"
#include "TestDiagResponseWrapper.h"


inline int format_as(dcgmDiagResult_enum type) {
return static_cast<int>(type);
}

TestDiagResponseWrapper::TestDiagResponseWrapper() = default;

TestDiagResponseWrapper::~TestDiagResponseWrapper() = default;
Expand Down