获取GPU信息

main.cu

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include <stdio.h>

#define HANDLE_ERROR(err) (handle_error(err, __FILE__, __LINE__))

__global__ void kernelFunc(float *a) { a[threadIdx.x] = 1; }

static void handle_error(cudaError_t err, const char *file, int line) {
if (err != cudaSuccess) {
printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}

int main(int argc, char *argv[]) {
// 指定GPU
cudaSetDevice(0);
float *aGPU;
// malloc GPU内存
cudaMalloc((void **)&aGPU, 16 * sizeof(float));
float a[16] = {0};
// 从内存中拷贝数据到GPU
cudaMemcpy(aGPU, a, 16 * sizeof(float), cudaMemcpyHostToDevice);
// 16个线程执行kernel func
kernelFunc<<<1, 16>>>(aGPU);
// 从GPU拷贝回内存
cudaMemcpy(a, aGPU, 16 * sizeof(float), cudaMemcpyDeviceToHost);
for (int i = 0; i < 16; i++) {
printf("%f ", a[i]);
}
printf("\n");
// 释放资源
cudaFree(aGPU);
cudaDeviceReset();

int gpuCount = -1;
cudaGetDeviceCount(&gpuCount);
printf("gpuCount:%d\n", gpuCount);

if (gpuCount < 0) {
printf("no device!\n");
exit(0);
}
// 指定最后一块GPU
cudaSetDevice(gpuCount - 1);

cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
printf("maxThreadsPerBlock: %d\n", prop.maxThreadsPerBlock);
printf("maxThreadsDim: %d %d %d\n", prop.maxThreadsDim[0],
prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
printf("maxGridSize: %d %d %d\n", prop.maxGridSize[0], prop.maxGridSize[1],
prop.maxGridSize[2]);
printf("totalConstMem: %ld\n", prop.totalConstMem);
printf("clockRate: %d\n", prop.clockRate);
printf("integrated: %d\n", prop.integrated);

int deviceId = -1;
cudaChooseDevice(&deviceId, &prop);
printf("deviceId: %d\n", deviceId);

int deviceList[2] = {0, 1};
HANDLE_ERROR(cudaSetValidDevices(deviceList, 2));
return 0;
}

输出:

1
2
3
4
5
6
7
8
9
10
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 
gpuCount:1
maxThreadsPerBlock: 1024
maxThreadsDim: 1024 1024 64
maxGridSize: 2147483647 65535 65535
totalConstMem: 65536
clockRate: 1241500
integrated: 0
deviceId: 0
invalid argument in main.cu at line 62