本文旨在处理本地电脑nvidia环境部署,以及nvidia orin jenson交叉编译的环境部署。
1. 部署电脑环境
https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
CUDA Version | Driver Version |
---|---|
CUDA 12.x | >=525.60.13 |
CUDA 11.8.x CUDA 11.7.x CUDA 11.6.x CUDA 11.5.x CUDA 11.4.x CUDA 11.3.x CUDA 11.2.x CUDA 11.1.x | >=450.80.02 |
cuda版本下载
https://developer.nvidia.com/cuda-12-6-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=runfile_local)
cudnn下载
https://developer.nvidia.com/cudnn-9-5-1-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_local
cudnn版本
https://docs.nvidia.com/deeplearning/cudnn/v9.5.1/release-notes.html
onnxruntime依赖
https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html
电脑部署版本
python | pytorch | CUDA | cuDNN | onnxruntime-gpu |
---|---|---|---|---|
py38+ | 2.1.0 | 12.6 | 9.5.1 | 1.19.2 |
2. 部署交叉编译环境
2.1 安装交叉编译环境
1. uname 命令可以显示操作系统和内核的相关信息,这在某些情况下也包含了板子的架构信息
(myenv) qwer@qwer-Z790-EAGLE-AX:/mnt/data/docker_images$ uname -a
Linux qwer-Z790-EAGLE-AX 5.15.0-124-generic #134~20.04.1-Ubuntu SMP Tue Oct 1 15:27:33 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux
2. cat /proc/device-tree # Jetson 板子的结果类似 NVIDIA Jetson Nano Developer Kit
3. lsb_release -a # 该命令显示操作系统的版本信息
4. 对于 NVIDIA Jetson 板子,可以使用以下命令查看详细的 Jetson 版本信息head -n 1 /etc/nv_tegra_release # 详细的 Jetson 版本信息 R32 (release), REVISION: 4.4
5. dmesg | grep -i board
6. cat /proc/cpuinfo
安装交叉编译环境
sudo apt-get install g+±aarch64-linux-gnu
# 选择合适版本的cuda交叉编译镜像
https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-cuda/tags# 创建docker环境
docker run -it \--runtime=nvidia \--privileged \--network host \--gpus all \-e DISPLAY=$DISPLAY \-e LIBGL_ALWAYS_SOFTWARE=0 \-e LIBGL_ALWAYS_INDIRECT=1 \-e NVIDIA_VISIBLE_DEVICES=all \-e NVIDIA_DRIVER_CAPABILITIES=all \-v /tmp/.X11-unix:/tmp/.X11-unix \--device=/dev/bus/usb:/dev/bus/usb \--device=/dev/input/js0:/dev/input/js0 \-v /mnt/data:/mnt/data \--name cross_compiler_12_2_arm64 nvcr.io/nvidia/l4t-cuda:12.2.2-devel-arm64-ubuntu22.04 # 安装cudnn
https://developer.nvidia.com/cudnn-9-5-1-download-archive?target_os=Linux&target_arch=aarch64-jetson&Compilation=Native&Distribution=Ubuntu&target_version=22.04&target_type=deb_local
2.2 编译onnxruntime
cd onnxruntimepython tools/ci_build/build.py --config Release --use_cuda --build_shared_lib --enable_pybind \
--parallel --skip_tests --build_dir ./build --cuda_home ${CUDA_HOME} \
--cudnn_home ${CUDNN_HOME}
# config Release:指定构建为发布版本。
# use_cuda:启用 CUDA 支持。
# build_shared_lib:构建共享库(onnxruntime 库)。
# enable_pybind:启用 Python 绑定。
# skip_tests:跳过测试(如果你不需要运行测试)。
# 构建 .whl 文件:
# 构建过程完成后,生成 .whl 文件。你可以通过 setup.py 来完成打包:
python setup.py bdist_wheel
- 编译遇到报错
CMake Error at onnxruntime_providers_cuda.cmake:200 (target_link_libraries):Target "onnxruntime_providers_cuda" links to:CUDA::cublasLt
修改文件 onnxruntime/cmake/external/onnxruntime_providers_cuda.cmake
###start
set(CUDA_LIB_DIR "/usr/local/cuda-12.2/targets/sbsa-linux/lib")
# 检查是否能找到 CUDA::cublasLt,如果找不到则手动添加路径
if (NOT TARGET CUDA::cublasLt)add_library(CUDA::cublasLt SHARED IMPORTED)set_target_properties(CUDA::cublasLt PROPERTIES IMPORTED_LOCATION "${CUDA_LIB_DIR}/libcublasLt.so")
endif()# 检查其他 CUDA 库
if (NOT TARGET CUDA::cublas)add_library(CUDA::cublas SHARED IMPORTED)set_target_properties(CUDA::cublas PROPERTIES IMPORTED_LOCATION "${CUDA_LIB_DIR}/libcublas.so")
endif()if (NOT TARGET CUDA::cudart)add_library(CUDA::cudart SHARED IMPORTED)set_target_properties(CUDA::cudart PROPERTIES IMPORTED_LOCATION "${CUDA_LIB_DIR}/libcudart.so")
endif()if (NOT TARGET CUDA::curand)add_library(CUDA::curand SHARED IMPORTED)set_target_properties(CUDA::curand PROPERTIES IMPORTED_LOCATION "${CUDA_LIB_DIR}/libcurand.so")
endif()if (NOT TARGET CUDA::cufft)add_library(CUDA::cufft SHARED IMPORTED)set_target_properties(CUDA::cufft PROPERTIES IMPORTED_LOCATION "${CUDA_LIB_DIR}/libcufft.so")
endif()# 链接库
target_link_libraries(${target} PRIVATE CUDA::cublasLt CUDA::cublas cudnn CUDA::curand CUDA::cufft CUDA::cudart${ABSEIL_LIBS} ${ONNXRUNTIME_PROVIDERS_SHARED} Boost::mp11 safeint_interface)