1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
| #include <stdio.h>
#define HANDLE_ERROR(err) (handle_error(err, __FILE__, __LINE__))
__global__ void kernelFunc(float *a) { a[threadIdx.x] = 1; }
static void handle_error(cudaError_t err, const char *file, int line) { if (err != cudaSuccess) { printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line); exit(EXIT_FAILURE); } }
int main(int argc, char *argv[]) { cudaSetDevice(0); float *aGPU; cudaMalloc((void **)&aGPU, 16 * sizeof(float)); float a[16] = {0}; cudaMemcpy(aGPU, a, 16 * sizeof(float), cudaMemcpyHostToDevice); kernelFunc<<<1, 16>>>(aGPU); cudaMemcpy(a, aGPU, 16 * sizeof(float), cudaMemcpyDeviceToHost); for (int i = 0; i < 16; i++) { printf("%f ", a[i]); } printf("\n"); cudaFree(aGPU); cudaDeviceReset();
int gpuCount = -1; cudaGetDeviceCount(&gpuCount); printf("gpuCount:%d\n", gpuCount);
if (gpuCount < 0) { printf("no device!\n"); exit(0); } cudaSetDevice(gpuCount - 1);
cudaDeviceProp prop; cudaGetDeviceProperties(&prop, 0); printf("maxThreadsPerBlock: %d\n", prop.maxThreadsPerBlock); printf("maxThreadsDim: %d %d %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); printf("maxGridSize: %d %d %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); printf("totalConstMem: %ld\n", prop.totalConstMem); printf("clockRate: %d\n", prop.clockRate); printf("integrated: %d\n", prop.integrated);
int deviceId = -1; cudaChooseDevice(&deviceId, &prop); printf("deviceId: %d\n", deviceId);
int deviceList[2] = {0, 1}; HANDLE_ERROR(cudaSetValidDevices(deviceList, 2)); return 0; }
|