Visual Studio工程配置情况:
VC++目录配置:
C:\ProgramData\NVIDIA Corporation\CUDA Samples\v10.\common\lib\x64
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.\lib\x64
C:\ProgramData\NVIDIA Corporation\CUDA Samples\v10.\common\inc
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.\include
链接器配置:
$(CudaToolkitLibDir)
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.\lib\x64
输入:
cublas.lib
cuda.lib
cudadevrt.lib
cudart.lib
cudart_static.lib
cufft.lib
cufftw.lib
curand.lib
cusolver.lib
cusparse.lib
nppc.lib
nppial.lib
nppicc.lib
nppicom.lib
nppidei.lib
nppif.lib
nppig.lib
nppim.lib
nppist.lib
nppisu.lib
nppitc.lib
npps.lib
nvblas.lib
nvgraph.lib
nvml.lib
nvrtc.lib
OpenCL.lib
测试代码:
#include <iostream>
#include <cuda.h>
#include "cuda_runtime.h"
#include <cuda_runtime_api.h>
#include "device_launch_parameters.h"
#include <device_functions.h> int main(void)
{
int deviceCount;
cudaGetDeviceCount(&deviceCount); int dev;
for (dev = ; dev < deviceCount; dev++)
{
int driver_version(), runtime_version();
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
if (dev == )
if (deviceProp.minor = && deviceProp.major == )
printf("\n");
printf("\nDevice%d:\"%s\"\n", dev, deviceProp.name);
cudaDriverGetVersion(&driver_version);
printf("CUDA驱动版本: %d.%d\n", driver_version / , (driver_version % ) / );
cudaRuntimeGetVersion(&runtime_version);
printf("CUDA运行时版本: %d.%d\n", runtime_version / , (runtime_version % ) / );
printf("设备计算能力: %d.%d\n", deviceProp.major, deviceProp.minor);
printf("Total amount of Global Memory: %u bytes\n", deviceProp.totalGlobalMem);
printf("Number of SMs: %d\n", deviceProp.multiProcessorCount);
printf("Total amount of Constant Memory: %u bytes\n", deviceProp.totalConstMem);
printf("Total amount of Shared Memory per block: %u bytes\n", deviceProp.sharedMemPerBlock);
printf("Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
printf("Warp size: %d\n", deviceProp.warpSize);
printf("Maximum number of threads per SM: %d\n", deviceProp.maxThreadsPerMultiProcessor);
printf("Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
printf("Maximum size of each dimension of a block: %d x %d x %d\n", deviceProp.maxThreadsDim[],deviceProp.maxThreadsDim[],deviceProp.maxThreadsDim[]);
printf("Maximum size of each dimension of a grid: %d x %d x %d\n", deviceProp.maxGridSize[], deviceProp.maxGridSize[], deviceProp.maxGridSize[]);
printf("Maximum memory pitch: %u bytes\n", deviceProp.memPitch);
printf("Texture alignmemt: %u bytes\n", deviceProp.texturePitchAlignment);
printf("Clock rate: %.2f GHz\n", deviceProp.clockRate * 1e-6f);
printf("Memory Clock rate: %.0f MHz\n", deviceProp.memoryClockRate * 1e-3f);
printf("Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
}
}
未完待续!
运行结果: