获取GPU信息

main.cu

#include <stdio.h>

#define HANDLE_ERROR(err) (handle_error(err, __FILE__, __LINE__))

__global__ void kernelFunc(float *a) { a[threadIdx.x] = 1; }

static void handle_error(cudaError_t err, const char *file, int line) {
  if (err != cudaSuccess) {
    printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line);
    exit(EXIT_FAILURE);
  }
}

int main(int argc, char *argv[]) {
  // 指定GPU
  cudaSetDevice(0);
  float *aGPU;
  // malloc GPU内存
  cudaMalloc((void **)&aGPU, 16 * sizeof(float));
  float a[16] = {0};
  // 从内存中拷贝数据到GPU
  cudaMemcpy(aGPU, a, 16 * sizeof(float), cudaMemcpyHostToDevice);
  // 16个线程执行kernel func
  kernelFunc<<<1, 16>>>(aGPU);
  // 从GPU拷贝回内存
  cudaMemcpy(a, aGPU, 16 * sizeof(float), cudaMemcpyDeviceToHost);
  for (int i = 0; i < 16; i++) {
    printf("%f ", a[i]);
  }
  printf("\n");
  // 释放资源
  cudaFree(aGPU);
  cudaDeviceReset();

  int gpuCount = -1;
  cudaGetDeviceCount(&gpuCount);
  printf("gpuCount:%d\n", gpuCount);

  if (gpuCount < 0) {
    printf("no device!\n");
    exit(0);
  }
  // 指定最后一块GPU
  cudaSetDevice(gpuCount - 1);

  cudaDeviceProp prop;
  cudaGetDeviceProperties(&prop, 0);
  printf("maxThreadsPerBlock: %d\n", prop.maxThreadsPerBlock);
  printf("maxThreadsDim: %d %d %d\n", prop.maxThreadsDim[0],
         prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
  printf("maxGridSize: %d %d %d\n", prop.maxGridSize[0], prop.maxGridSize[1],
         prop.maxGridSize[2]);
  printf("totalConstMem: %ld\n", prop.totalConstMem);
  printf("clockRate: %d\n", prop.clockRate);
  printf("integrated: %d\n", prop.integrated);

  int deviceId = -1;
  cudaChooseDevice(&deviceId, &prop);
  printf("deviceId: %d\n", deviceId);

  int deviceList[2] = {0, 1};
  HANDLE_ERROR(cudaSetValidDevices(deviceList, 2));
  return 0;
}

输出：

0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 
gpuCount:1
maxThreadsPerBlock: 1024
maxThreadsDim: 1024 1024 64
maxGridSize: 2147483647 65535 65535
totalConstMem: 65536
clockRate: 1241500
integrated: 0
deviceId: 0
invalid argument in main.cu at line 62