Skip to content

Target my discrete gpu #3

@microSoftware

Description

@microSoftware

running on macbook pro 2015
the command system_profiler SPDisplaysDataType | grep "Chipset Model" :

Chipset Model: Intel Iris Pro
Chipset Model: AMD Radeon R9 M370X

My code benchmark cpu and gpu. And it work great. But currently my igpu is the one doing the computing. i'd like to target specifically the AMD Radeon R9 which is the discrete

import { setupGlobals } from 'bun-webgpu';

setupGlobals();

// CPU Matrix Multiplication
function matrixMultiplyCPU(
  matrixA: Float32Array,
  matrixB: Float32Array,
  M: number,
  K: number,
  N: number
): Float32Array {
  const result = new Float32Array(M * N);
  
  for (let row = 0; row < M; row++) {
    for (let col = 0; col < N; col++) {
      let sum = 0;
      for (let i = 0; i < K; i++) {
        sum += matrixA[row * K + i] * matrixB[i * N + col];
      }
      result[row * N + col] = sum;
    }
  }
  
  return result;
}

// GPU Matrix Multiplication
async function matrixMultiplyGPU(
  device: GPUDevice,
  matrixA: Float32Array,
  matrixB: Float32Array,
  M: number,
  K: number,
  N: number
): Promise<Float32Array> {
  const shaderCode = `
    @group(0) @binding(0) var<storage, read> matrixA: array<f32>;
    @group(0) @binding(1) var<storage, read> matrixB: array<f32>;
    @group(0) @binding(2) var<storage, read_write> result: array<f32>;

    @compute @workgroup_size(16, 16)
    fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
      let M = ${M}u;
      let K = ${K}u;
      let N = ${N}u;
      
      let row = global_id.y;
      let col = global_id.x;
      
      if (row >= M || col >= N) {
        return;
      }
      
      var sum = 0.0;
      for (var i = 0u; i < K; i = i + 1u) {
        sum = sum + matrixA[row * K + i] * matrixB[i * N + col];
      }
      
      result[row * N + col] = sum;
    }
  `;

  // Create buffers
  const bufferA = device.createBuffer({
    size: matrixA.byteLength,
    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
    mappedAtCreation: true,
  });
  new Float32Array(bufferA.getMappedRange()).set(matrixA);
  bufferA.unmap();

  const bufferB = device.createBuffer({
    size: matrixB.byteLength,
    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
    mappedAtCreation: true,
  });
  new Float32Array(bufferB.getMappedRange()).set(matrixB);
  bufferB.unmap();

  const resultSize = M * N * Float32Array.BYTES_PER_ELEMENT;
  const bufferResult = device.createBuffer({
    size: resultSize,
    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
  });

  const stagingBuffer = device.createBuffer({
    size: resultSize,
    usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
  });

  // Create shader and pipeline
  const shaderModule = device.createShaderModule({ code: shaderCode });

  const bindGroupLayout = device.createBindGroupLayout({
    entries: [
      {
        binding: 0,
        visibility: GPUShaderStage.COMPUTE,
        buffer: { type: "read-only-storage" as GPUBufferBindingType },
      },
      {
        binding: 1,
        visibility: GPUShaderStage.COMPUTE,
        buffer: { type: "read-only-storage" as GPUBufferBindingType },
      },
      {
        binding: 2,
        visibility: GPUShaderStage.COMPUTE,
        buffer: { type: "storage" as GPUBufferBindingType },
      },
    ],
  });

  const computePipeline = device.createComputePipeline({
    layout: device.createPipelineLayout({
      bindGroupLayouts: [bindGroupLayout],
    }),
    compute: {
      module: shaderModule,
      entryPoint: "main",
    },
  });

  const bindGroup = device.createBindGroup({
    layout: bindGroupLayout,
    entries: [
      { binding: 0, resource: { buffer: bufferA } },
      { binding: 1, resource: { buffer: bufferB } },
      { binding: 2, resource: { buffer: bufferResult } },
    ],
  });

  // Execute
  const commandEncoder = device.createCommandEncoder();
  const passEncoder = commandEncoder.beginComputePass();
  passEncoder.setPipeline(computePipeline);
  passEncoder.setBindGroup(0, bindGroup);
  passEncoder.dispatchWorkgroups(Math.ceil(N / 16), Math.ceil(M / 16));
  passEncoder.end();

  commandEncoder.copyBufferToBuffer(
    bufferResult,
    0,
    stagingBuffer,
    0,
    resultSize
  );

  device.queue.submit([commandEncoder.finish()]);

  // Read results
  await stagingBuffer.mapAsync(GPUMapMode.READ);
  const resultArray = new Float32Array(
    new Float32Array(stagingBuffer.getMappedRange()).slice()
  );
  stagingBuffer.unmap();

  // Cleanup
  bufferA.destroy();
  bufferB.destroy();
  bufferResult.destroy();
  stagingBuffer.destroy();

  return resultArray;
}

function formatMatrix(matrix: Float32Array, rows: number, cols: number): string {
  let result = "";
  for (let i = 0; i < rows; i++) {
    const row = [];
    for (let j = 0; j < cols; j++) {
      row.push(matrix[i * cols + j].toFixed(2).padStart(8));
    }
    result += row.join(" ") + "\n";
  }
  return result;
}

function arraysEqual(a: Float32Array, b: Float32Array, tolerance = 0.0001): boolean {
  if (a.length !== b.length) return false;
  for (let i = 0; i < a.length; i++) {
    if (Math.abs(a[i] - b[i]) > tolerance) return false;
  }
  return true;
}

async function runBenchmark() {
  // Test with different matrix sizes - going MUCH larger
  const sizes = [
    { M: 10, K: 10, N: 10 },
    { M: 100, K: 100, N: 100 },
    // { M: 500, K: 500, N: 500 },
    // { M: 1000, K: 1000, N: 1000 },
    // { M: 2000, K: 2000, N: 2000 },
    //{ M: 3000, K: 3000, N: 3000 },
    //{ M: 4000, K: 4000, N: 4000 },
  ];

  // Initialize GPU
  const adapter = await navigator.gpu.requestAdapter();
  if (!adapter) {
    throw new Error("No GPU adapter found");
  }
  console.log(adapter);

  const device = await adapter.requestDevice();
  if (!device) {
    throw new Error("Failed to get GPU device");
  }
  console.log("device");
console.log(device);

  console.log("Running benchmarks...\n");

  for (const { M, K, N } of sizes) {
    console.log(`=== Matrix Size: ${M}×${K} × ${K}×${N} (${M * K + K * N} elements) ===`);

    // Generate random matrices
    const matrixA = new Float32Array(M * K);
    const matrixB = new Float32Array(K * N);
    
    for (let i = 0; i < matrixA.length; i++) {
      matrixA[i] = Math.random() * 10;
    }
    for (let i = 0; i < matrixB.length; i++) {
      matrixB[i] = Math.random() * 10;
    }

    // Benchmark CPU
    const cpuStart = performance.now();
    const cpuResult = matrixMultiplyCPU(matrixA, matrixB, M, K, N);
    const cpuTime = performance.now() - cpuStart;

    // Benchmark GPU (including data transfer)
    const gpuStart = performance.now();
    const gpuResult = await matrixMultiplyGPU(device, matrixA, matrixB, M, K, N);
    const gpuTime = performance.now() - gpuStart;

    // Verify results match
    const resultsMatch = arraysEqual(cpuResult, gpuResult);

    const speedup = cpuTime / gpuTime;
    const winner = speedup > 1 ? "🏆 GPU wins!" : "CPU wins";
    
    console.log(`CPU Time: ${cpuTime.toFixed(3)}ms`);
    console.log(`GPU Time: ${gpuTime.toFixed(3)}ms (including data transfer)`);
    console.log(`Speedup: ${speedup.toFixed(2)}x - ${winner}`);
    console.log(`Results match: ${resultsMatch ? '✓' : '✗'}`);
    
    if (!resultsMatch && M <= 100) {
      // Show a few sample values for debugging
      console.log(`Sample CPU values: [${Array.from(cpuResult.slice(0, 5)).map(v => v.toFixed(2)).join(', ')}...]`);
      console.log(`Sample GPU values: [${Array.from(gpuResult.slice(0, 5)).map(v => v.toFixed(2)).join(', ')}...]`);
    }
    
    console.log("");
  }

  device.destroy();
  
  console.log("\nNote: GPU overhead includes buffer creation, data transfer,");
  console.log("shader compilation, and result retrieval. For very large matrices");
  console.log("or repeated operations, GPU will significantly outperform CPU.");
}

runBenchmark().catch(console.error);```

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions