From e7734ea1fe4ebe652dfa0965c2d27e8d832547fa Mon Sep 17 00:00:00 2001 From: Harold Brenes Date: Thu, 12 Oct 2023 22:18:19 -0400 Subject: [PATCH 1/5] Add missing `--device` to simulator --- src/commands/CmdSimulator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/commands/CmdSimulator.cpp b/src/commands/CmdSimulator.cpp index e1c48251..e29089bd 100644 --- a/src/commands/CmdSimulator.cpp +++ b/src/commands/CmdSimulator.cpp @@ -92,6 +92,7 @@ void CmdSimulateMain( GlobalPlotConfig& gCfg, CliParser& cli ) else if( cli.ReadSize( cfg.farmSize, "-s", "--size" ) ) continue; else if( cli.ReadHexStrAsBytes( cfg.randomSeed, sizeof( cfg.randomSeed ), "--seed" ) ) continue; else if( cli.ReadSwitch( cfg.noCuda, "--no-cuda" ) ) continue; + else if( cli.ReadI32( cfg.cudaDevice, "-d", "--device" ) ) continue; else break; } From e8bf59914a2fde23ea502f1a2700e98821ce8adf Mon Sep 17 00:00:00 2001 From: Harold Brenes Date: Fri, 8 Dec 2023 16:42:40 -0500 Subject: [PATCH 2/5] Simple CUDA device listing --- .vscode/launch.json | 6 +++- cuda/CudaPlotter.cu | 73 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index bb356736..0e8e671c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -137,7 +137,11 @@ // "--memo", "80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef207d52406afa2b6d7d92ea778f407205bd9dca40816c1b1cacfca2a6612b93eb", "args": - "-w -n 1 -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --check 100 --check-threshold 2 /home/harold/plot", + + // List devices + "-w -n 1 -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 cudaplot -l", + + // "-w -n 1 -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --check 100 --check-threshold 2 /home/harold/plot", // "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot /home/harold/plot", // "-w -z 1 -f ade0cc43610ce7540ab96a524d0ab17f5df7866ef13d1221a7203e5d10ad2a4ae37f7b73f6cdfd6ddf4122e8a1c2f8ef -p 80a836a74b077cabaca7a76d1c3c9f269f7f3a8f2fa196a65ee8953eb81274eb8b7328d474982617af5a0fe71b47e9b8 -i c6b84729c23dc6d60c92f22c17083f47845c1179227c5509f07a5d2804a7b835 cudaplot --disk-128 -t1 /home/harold/plotdisk --no-direct-buffers /home/harold/plot", diff --git a/cuda/CudaPlotter.cu b/cuda/CudaPlotter.cu index 80ba8b0e..95cd8fe4 100644 --- a/cuda/CudaPlotter.cu +++ b/cuda/CudaPlotter.cu @@ -47,6 +47,7 @@ static void UploadBucketToGpu( CudaK32PlotContext& context, TableId table, const static void LoadAndSortBucket( CudaK32PlotContext& cx, const uint32 bucket ); void CudaMatchBucketizedK32( CudaK32PlotContext& cx, const uint32* devY, cudaStream_t stream, cudaEvent_t event ); +[[noreturn]] static void ListCudaDevices( bool json ); // Defined in FxCuda.cu void GenFx( CudaK32PlotContext& cx, const uint32* devYIn, const uint32* devMetaIn, cudaStream_t stream ); @@ -59,6 +60,11 @@ GPU-based (CUDA) plotter -h, --help : Shows this help message and exits. -d, --device : Select the CUDA device index. (default=0) + -l, --list : List availabe CUDA devices, showing their indices. + + --json : Show output in json format. This is only valid for certain parameters: + --list + --disk-128 : Enable hybrid disk plotting for 128G system RAM. Requires a --temp1 and --temp2 to be set. @@ -95,6 +101,9 @@ void CudaK32Plotter::ParseCLI( const GlobalPlotConfig& gCfg, CliParser& cli ) CudaK32PlotConfig& cfg = _cfg; cfg.gCfg = &gCfg; + bool listDevices = false; + bool json = false; + while( cli.HasArgs() ) { if( cli.ReadU32( cfg.deviceIndex, "-d", "--device" ) ) @@ -127,6 +136,10 @@ void CudaK32Plotter::ParseCLI( const GlobalPlotConfig& gCfg, CliParser& cli ) continue; if( cli.ReadF64( cfg.plotCheckThreshhold, "--check-threshold" ) ) continue; + if( cli.ReadSwitch( json, "--json" ) ) + continue; + if( cli.ReadSwitch( listDevices, "-l", "--list" ) ) + continue; // if( cli.ReadSwitch( cfg.disableDirectDownloads, "--no-direct-buffers" ) ) // continue; if( cli.ArgMatch( "--help", "-h" ) ) @@ -139,6 +152,8 @@ void CudaK32Plotter::ParseCLI( const GlobalPlotConfig& gCfg, CliParser& cli ) } // The rest should be output directies, parsed by the global config parser. + if( listDevices ) + ListCudaDevices( json ); if( cfg.hybrid128Mode && gCfg.compressionLevel <= 0 ) { @@ -347,6 +362,64 @@ void CudaInit( CudaK32PlotContext& cx ) //FatalIf( supportsCoopLaunch != 1, "This CUDA device does not support cooperative kernel launches." ); } +//----------------------------------------------------------- +void ListCudaDevices( const bool json ) +{ + cudaError_t err = cudaSuccess; + int deviceCount = 0; + + #define CheckCudaSuccess( x ) if( (err = x) != cudaSuccess ) goto CUDA_ERROR_EXIT; + + { + CheckCudaSuccess( cudaGetDeviceCount( &deviceCount ) ); + + if( deviceCount < 1 ) + { + const char* e = "No CUDA devices available."; + if( json ) + Log::Line( R"({"error": "%s"})", e ); + else + Log::Line( e ); + exit(0); + } + + if( json ) + Log::Line("["); + + for( int i = 0; i < deviceCount; i++ ) + { + cudaDeviceProp cudaDevProps{}; + CheckCudaSuccess( cudaGetDeviceProperties( &cudaDevProps, i ) ); + + if( json ) + { + Log::Write( R"( {"id": %d, "name": "%s"})", i, cudaDevProps.name ); + if( i+1 < deviceCount ) + Log::Write( "," ); + + Log::NewLine(); + } + else + Log::Line( "%-2d: %s", i, cudaDevProps.name ); + } + + if( json ) + Log::Line("]"); + + exit(0); + } + + #undef CheckCudaSuccess + CUDA_ERROR_EXIT: + + if( json ) + Log::Error( R"({ "error": "Failed to list CUDA devices with error 0x%llx: '%s'"})", (llu)err, cudaGetErrorString( err ) ); + else + Log::Error( "Failed to list CUDA devices with error 0x%llx: '%s'.", (llu)err, cudaGetErrorString( err ) ); + + exit(1); +} + /// /// Plotting entry point From 02671c5ebe28656f563f327f188a9d0716afee5d Mon Sep 17 00:00:00 2001 From: Chris Marslender Date: Wed, 10 Jul 2024 12:27:37 -0500 Subject: [PATCH 3/5] Updated manylinux build images --- .github/workflows/build-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 156b4a8d..1e609200 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -11,7 +11,7 @@ jobs: build-harvester-linux-x86-64: runs-on: ubuntu-20.04 container: - image: ghcr.io/chia-network/build-images/manylinux2014_cuda_x86_64:sha-1caf046d5ff19b7c743de2a106dd86928794032b + image: ghcr.io/chia-network/build-images/manylinux_cuda_x86_64:main steps: - name: Checkout Repo uses: actions/checkout@v4 @@ -105,7 +105,7 @@ jobs: build-harvester-linux-arm64: runs-on: [ARM64, Linux] container: - image: ghcr.io/chia-network/build-images/manylinux2014_cuda_aarch64:sha-1caf046d5ff19b7c743de2a106dd86928794032b + image: ghcr.io/chia-network/build-images/manylinux_cuda_aarch64:main steps: - name: Checkout Repo uses: actions/checkout@v4 From 51a29432a72a424dbd001645613deaf62df7b757 Mon Sep 17 00:00:00 2001 From: Chris Marslender Date: Wed, 10 Jul 2024 12:30:52 -0500 Subject: [PATCH 4/5] Update to the rocky8 image for rhel builds --- .github/workflows/build-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 1e609200..1b5ea039 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -279,7 +279,7 @@ jobs: build-bladebit-centos-x86-64: runs-on: ubuntu-20.04 container: - image: quay.io/centos/centos:stream8 + image: chianetwork/rocky8-builder:latest steps: - name: Checkout Repo uses: actions/checkout@v4 @@ -339,7 +339,7 @@ jobs: build-bladebit-centos-arm64: runs-on: [ARM64, Linux] container: - image: quay.io/centos/centos:stream8 + image: chianetwork/rocky8-builder:latest steps: - name: Checkout Repo uses: actions/checkout@v4 From 37fd0ffef0b01e72d2581cb2183cb8dc7a3c3312 Mon Sep 17 00:00:00 2001 From: Chris Marslender Date: Wed, 10 Jul 2024 13:34:13 -0500 Subject: [PATCH 5/5] Update intel mac to macos12 --- .github/workflows/build-release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 1b5ea039..a122efa6 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -195,7 +195,7 @@ jobs: if-no-files-found: error build-harvester-macos-x86-64: - runs-on: macOS-11 + runs-on: macOS-12 steps: - name: Cleanup Environment uses: Chia-Network/actions/clean-workspace@main