macos | Xcode | Apple LLVM | |
CUDA Toolkit 9.1 対応 | 10.13.3 | 9.2 | 9.0 |
最新版 | 10.13.4 | 9.3 | 9.1 |
[/Developer/NVIDIA/CUDA-9.1/samples]$ make -C 0_Simple/vectorAdd /Developer/NVIDIA/CUDA-9.1/bin/nvcc -ccbin clang++ -I../../common/inc -m64 -Xcompiler -arch -Xcompiler x86_64 -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_70,code=compute_70 -o vectorAdd.o -c vectorAdd.cu nvcc fatal : The version ('90100') of the host compiler ('Apple clang') is not supported make: *** [vectorAdd.o] Error 1
$ mv ~/Downloads/Xcode.app /Applications/Xcode_9.2.app
[/Applications]$ clang --version Apple LLVM version 9.1.0 (clang-902.0.39.1) Target: x86_64-apple-darwin17.5.0 Thread model: posix InstalledDir: /Library/Developer/CommandLineTools/usr/bin [/Applications]$ sudo xcode-select -s /Applications/Xcode_9.2.app/Contents/Developer/ [/Applications]$ xcodebuild -version Xcode 9.2 Build version 9C40b [/Applications]$ clang --version Apple LLVM version 9.0.0 (clang-900.0.39.2) Target: x86_64-apple-darwin17.5.0 Thread model: posix InstalledDir: /Applications/Xcode_9.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin
export DEVELOPER_DIR=/Applications/Xcode_9.2.app/Contents/Developer/
[/Developer/NVIDIA/CUDA-9.1/samples]$ ./bin/x86_64/darwin/release/deviceQuery ./bin/x86_64/darwin/release/deviceQuery Starting... CUDA Device Query (Runtime API) version (CUDART static linking) cudaGetDeviceCount returned 35 -> CUDA driver version is insufficient for CUDA runtime version Result = FAIL
export PS1="[\w]\$ " export PYENV_ROOT="${HOME}/.pyenv" export PATH=${PYENV_ROOT}/bin:$PATH eval "$(pyenv init -)" PERL_MB_OPT="--install_base \"/Users/atsushi/perl5\""; export PERL_MB_OPT; PERL_MM_OPT="INSTALL_BASE=/Users/atsushi/perl5"; export PERL_MM_OPT; export JAVA_HOME="/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/" export PATH=${PATH}:${JAVA_HOME}/bin export SCALA_HOME=/usr/local/Cellar/scala/2.11.4/ export LANG="ja_JP.UTF-8" export ANSIBLE_NOCOWS=1 # for CUDA export DEVELOPER_DIR=/Applications/Xcode_9.2.app/Contents/Developer/ export PATH=/Developer/NVIDIA/CUDA-9.1/bin${PATH:+:${PATH}} export DYLD_LIBRARY_PATH=/Developer/NVIDIA/CUDA-9.1/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}
[/Developer/NVIDIA/CUDA-9.1/samples]$ sudo make -C 1_Utilities/deviceQuery [/Developer/NVIDIA/CUDA-9.1/samples]$ ./bin/x86_64/darwin/release/deviceQuery ./bin/x86_64/darwin/release/deviceQuery Starting... CUDA Device Query (Runtime API) version (CUDART static linking) Detected 1 CUDA Capable device(s) Device 0: "GeForce GT 750M" CUDA Driver Version / Runtime Version 9.1 / 9.1 CUDA Capability Major/Minor version number: 3.0 Total amount of global memory: 2048 MBytes (2147024896 bytes) ( 2) Multiprocessors, (192) CUDA Cores/MP: 384 CUDA Cores GPU Max Clock rate: 926 MHz (0.93 GHz) Memory Clock rate: 2508 Mhz Memory Bus Width: 128-bit L2 Cache Size: 262144 bytes Maximum Texture Dimension Size (x,y,z) 1D=(65536), 2D=(65536, 65536), 3D=(4096, 4096, 4096) Maximum Layered 1D Texture Size, (num) layers 1D=(16384), 2048 layers Maximum Layered 2D Texture Size, (num) layers 2D=(16384, 16384), 2048 layers Total amount of constant memory: 65536 bytes Total amount of shared memory per block: 49152 bytes Total number of registers available per block: 65536 Warp size: 32 Maximum number of threads per multiprocessor: 2048 Maximum number of threads per block: 1024 Max dimension size of a thread block (x,y,z): (1024, 1024, 64) Max dimension size of a grid size (x,y,z): (2147483647, 65535, 65535) Maximum memory pitch: 2147483647 bytes Texture alignment: 512 bytes Concurrent copy and kernel execution: Yes with 1 copy engine(s) Run time limit on kernels: Yes Integrated GPU sharing Host Memory: No Support host page-locked memory mapping: Yes Alignment requirement for Surfaces: Yes Device has ECC support: Disabled Device supports Unified Addressing (UVA): Yes Supports Cooperative Kernel Launch: No Supports MultiDevice Co-op Kernel Launch: No Device PCI Domain ID / Bus ID / location ID: 0 / 1 / 0 Compute Mode: < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) > deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 9.1, CUDA Runtime Version = 9.1, NumDevs = 1 Result = PASS
[~/Downloads]$ tar xzvf cudnn-9.1-osx-x64-v7-ga.tgz x cuda/include/cudnn.h x cuda/NVIDIA_SLA_cuDNN_Support.txt x cuda/lib/libcudnn.7.dylib x cuda/lib/libcudnn.dylib x cuda/lib/libcudnn_static.a [~/Downloads]$ sudo cp -v cuda/include/cudnn.h /usr/local/cuda/include/ cuda/include/cudnn.h -> /usr/local/cuda/include/cudnn.h [~/Downloads]$ sudo cp -v cuda/lib/libcudnn* /usr/local/cuda/lib cuda/lib/libcudnn.7.dylib -> /usr/local/cuda/lib/libcudnn.7.dylib cuda/lib/libcudnn.dylib -> /usr/local/cuda/lib/libcudnn.dylib cuda/lib/libcudnn_static.a -> /usr/local/cuda/lib/libcudnn_static.a [~/Downloads]$ sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib/libcudnn*
export PS1="[\w]\$ " export LANG="ja_JP.UTF-8" # for pyenv export PYENV_ROOT="${HOME}/.pyenv" export PATH=${PYENV_ROOT}/bin:$PATH eval "$(pyenv init -)" # for anaconda . /Users/atsushi/.pyenv/versions/anaconda3-5.1.0/etc/profile.d/conda.sh PERL_MB_OPT="--install_base \"/Users/atsushi/perl5\""; export PERL_MB_OPT; PERL_MM_OPT="INSTALL_BASE=/Users/atsushi/perl5"; export PERL_MM_OPT; export JAVA_HOME="/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/" export PATH=${PATH}:${JAVA_HOME}/bin export SCALA_HOME=/usr/local/Cellar/scala/2.11.4/ export ANSIBLE_NOCOWS=1 # for CUDA export DEVELOPER_DIR=/Applications/Xcode_9.2.app/Contents/Developer/ export PATH=/Developer/NVIDIA/CUDA-9.1/bin${PATH:+:${PATH}} export DYLD_LIBRARY_PATH=/Developer/NVIDIA/CUDA-9.1/lib${DYLD_LIBRARY_PATH:+:${DYLD_LIBRARY_PATH}} # for cuDNN export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
$ echo -e '#include"cudnn.h"\n int main(){}' | nvcc -x c - -o /dev/null \ -Iusr/local/cuda/include -L/usr/local/cuda/lib -lcudnnこれでエラーが起きなきゃ、nvcc と ヘッダファイルがきちんと配布されていることが検証できる