在invidia jetpack4.5.1上运行c++版yolov8（tensorRT）

心路历程（可略过）

为了能在arm64上跑通yolov8，我试过很多很多代码，太多对库版本的要求太高了；
比如说有一个是需要依赖onnx库的，（https://github.com/UNeedCryDear/yolov8-opencv-onnxruntime-cpp）
运行成功了报错error: IOrtSessionOptionsAppendExecutionProvider CUDA’ was not declare
d in this scope，一查是不仅需要onnx库，还需要gpu版本的onnx库
因为这个函数是onnxgpu里才有的函数OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(_OrtSessionOptions, cudaID);
而onnxruntime的官方下载地址（https://github.com/microsoft/onnxruntime/releases/）
在这里插入图片描述
只有这个版本可以用，但是这个并不是onnxruntime的gpu版本，我在论坛上上搜到onnx官方是不提供nvidia gpu的库的，所以需要自己编译。
我就尝试自己编译，结果有各种各样的版本不匹配的问题，先是说opencv版本低，然后又是杂七杂八的。我都按照要求升级了，最后来一个gcc版本也得升级，那我真是得放弃了，因为当前硬件得这些基础环境是不能改变的，我只能放弃上面这个关于onnxruntime的yolov8代码；（所以得到一个经验，这种大型的库最好直接下载官方现成的，自己编译真的非常麻烦，不到万不得已的时候建议直接换代码，这种版本匹配与编译的问题是最难解决的）

好在很幸运，找到了一个轻量级的能在nvidia arm64硬件上成功运行的轻量级c++yolov8代码，非常简洁好用，不需要依赖杂七杂八的库，可以说直接用jetpack默认的库就能可以简单编译而成，能找到非常不容易，下面是全部代码。

jetpack版本
文件结构
main.cpp

//
// Created by  triple-Mu     on 24-1-2023.
// Modified by Q-engineering on  6-3-2024
//#include "chrono"
#include "opencv2/opencv.hpp"
#include "yolov8.hpp"using namespace std;
using namespace cv;//#define VIDEOcv::Size       im_size(640, 640);
const int      num_labels  = 80;
const int      topk        = 100;
const float    score_thres = 0.25f;
const float    iou_thres   = 0.65f;int main(int argc, char** argv)
{float    f;float    FPS[16];int      i, Fcnt=0;cv::Mat  image;std::chrono::steady_clock::time_point Tbegin, Tend;if (argc < 3) {fprintf(stderr,"Usage: ./YoloV8_RT [model_trt.engine] [image or video path] \n");return -1;}const string engine_file_path = argv[1];const string imagepath = argv[2];for(i=0;i<16;i++) FPS[i]=0.0;cout << "Set CUDA...\n" << endl;//wjp// cudaSetDevice(0);cudaStream_t(0);cout << "Loading TensorRT model " << engine_file_path << endl;cout << "\nWait a second...." << std::flush;auto yolov8 = new YOLOv8(engine_file_path);cout << "\rLoading the pipe... " << string(10, ' ')<< "\n\r" ;cout << endl;yolov8->MakePipe(true);#ifdef VIDEOVideoCapture cap(imagepath);if (!cap.isOpened()) {cerr << "ERROR: Unable to open the stream " << imagepath << endl;return 0;}
#endif // VIDEOwhile(1){
#ifdef VIDEOcap >> image;if (image.empty()) {cerr << "ERROR: Unable to grab from the camera" << endl;break;}
#elseimage = cv::imread(imagepath);
#endifyolov8->CopyFromMat(image, im_size);std::vector<Object> objs;Tbegin = std::chrono::steady_clock::now();yolov8->Infer();Tend = std::chrono::steady_clock::now();yolov8->PostProcess(objs, score_thres, iou_thres, topk, num_labels);yolov8->DrawObjects(image, objs);//calculate frame ratef = std::chrono::duration_cast <std::chrono::milliseconds> (Tend - Tbegin).count();cout << "Infer time " << f << endl;if(f>0.0) FPS[((Fcnt++)&0x0F)]=1000.0/f;for(f=0.0, i=0;i<16;i++){ f+=FPS[i]; }putText(image, cv::format("FPS %0.2f", f/16),cv::Point(10,20),cv::FONT_HERSHEY_SIMPLEX,0.6, cv::Scalar(0, 0, 255));//show output// imshow("Jetson Orin Nano- 8 Mb RAM", image);// char esc = cv::waitKey(1);// if(esc == 27) break;imwrite("./out.jpg", image);return 0;}cv::destroyAllWindows();delete yolov8;return 0;
}

yolov8.cpp

//
// Created by  triple-Mu     on 24-1-2023.
// Modified by Q-engineering on  6-3-2024
//#include "yolov8.hpp"
#include <cuda_runtime_api.h>
#include <cuda.h>//----------------------------------------------------------------------------------------
//using namespace det;
//----------------------------------------------------------------------------------------
const char* class_names[] = {"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light","fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow","elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee","skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard","tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple","sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch","potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone","microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear","hair drier", "toothbrush"
};
//----------------------------------------------------------------------------------------
YOLOv8::YOLOv8(const std::string& engine_file_path)
{std::ifstream file(engine_file_path, std::ios::binary);assert(file.good());file.seekg(0, std::ios::end);auto size = file.tellg();file.seekg(0, std::ios::beg);char* trtModelStream = new char[size];assert(trtModelStream);file.read(trtModelStream, size);file.close();initLibNvInferPlugins(&this->gLogger, "");this->runtime = nvinfer1::createInferRuntime(this->gLogger);assert(this->runtime != nullptr);this->engine = this->runtime->deserializeCudaEngine(trtModelStream, size);assert(this->engine != nullptr);delete[] trtModelStream;this->context = this->engine->createExecutionContext();assert(this->context != nullptr);cudaStreamCreate(&this->stream);this->num_bindings = this->engine->getNbBindings();for (int i = 0; i < this->num_bindings; ++i) {Binding            binding;nvinfer1::Dims     dims;nvinfer1::DataType dtype = this->engine->getBindingDataType(i);std::string        name  = this->engine->getBindingName(i);binding.name             = name;binding.dsize            = type_to_size(dtype);bool IsInput = engine->bindingIsInput(i);if (IsInput) {this->num_inputs += 1;dims         = this->engine->getProfileDimensions(i, 0, nvinfer1::OptProfileSelector::kMAX);binding.size = get_size_by_dims(dims);binding.dims = dims;this->input_bindings.push_back(binding);// set max opt shapethis->context->setBindingDimensions(i, dims);}else {dims         = this->context->getBindingDimensions(i);binding.size = get_size_by_dims(dims);binding.dims = dims;this->output_bindings.push_back(binding);this->num_outputs += 1;}}
}
//----------------------------------------------------------------------------------------
YOLOv8::~YOLOv8()
{this->context->destroy();this->engine->destroy();this->runtime->destroy();cudaStreamDestroy(this->stream);for (auto& ptr : this->device_ptrs) {CHECK(cudaFree(ptr));}for (auto& ptr : this->host_ptrs) {CHECK(cudaFreeHost(ptr));}
}
//----------------------------------------------------------------------------------------
void YOLOv8::MakePipe(bool warmup)
{
#ifndef CUDART_VERSION
#error CUDART_VERSION Undefined!
#endiffor (auto& bindings : this->input_bindings) {void* d_ptr;
#if(CUDART_VERSION < 11000)CHECK(cudaMalloc(&d_ptr, bindings.size * bindings.dsize));
#elseCHECK(cudaMallocAsync(&d_ptr, bindings.size * bindings.dsize, this->stream));
#endifthis->device_ptrs.push_back(d_ptr);}for (auto& bindings : this->output_bindings) {void * d_ptr, *h_ptr;size_t size = bindings.size * bindings.dsize;
#if(CUDART_VERSION < 11000)CHECK(cudaMalloc(&d_ptr, bindings.size * bindings.dsize));
#elseCHECK(cudaMallocAsync(&d_ptr, bindings.size * bindings.dsize, this->stream));
#endifCHECK(cudaHostAlloc(&h_ptr, size, 0));this->device_ptrs.push_back(d_ptr);this->host_ptrs.push_back(h_ptr);}if (warmup) {for (int i = 0; i < 10; i++) {for (auto& bindings : this->input_bindings) {size_t size  = bindings.size * bindings.dsize;void*  h_ptr = malloc(size);memset(h_ptr, 0, size);CHECK(cudaMemcpyAsync(this->device_ptrs[0], h_ptr, size, cudaMemcpyHostToDevice, this->stream));free(h_ptr);}this->Infer();}}
}
//----------------------------------------------------------------------------------------
void YOLOv8::Letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size)
{const float inp_h  = size.height;const float inp_w  = size.width;float       height = image.rows;float       width  = image.cols;float r    = std::min(inp_h / height, inp_w / width);int   padw = std::round(width * r);int   padh = std::round(height * r);cv::Mat tmp;if ((int)width != padw || (int)height != padh) {cv::resize(image, tmp, cv::Size(padw, padh));}else {tmp = image.clone();}float dw = inp_w - padw;float dh = inp_h - padh;dw /= 2.0f;dh /= 2.0f;int top    = int(std::round(dh - 0.1f));int bottom = int(std::round(dh + 0.1f));int left   = int(std::round(dw - 0.1f));int right  = int(std::round(dw + 0.1f));cv::copyMakeBorder(tmp, tmp, top, bottom, left, right, cv::BORDER_CONSTANT, {114, 114, 114});cv::dnn::blobFromImage(tmp, out, 1 / 255.f, cv::Size(), cv::Scalar(0, 0, 0), true, false, CV_32F);this->pparam.ratio  = 1 / r;this->pparam.dw     = dw;this->pparam.dh     = dh;this->pparam.height = height;this->pparam.width  = width;;
}
//----------------------------------------------------------------------------------------
void YOLOv8::CopyFromMat(const cv::Mat& image)
{cv::Mat  nchw;auto&    in_binding = this->input_bindings[0];auto     width      = in_binding.dims.d[3];auto     height     = in_binding.dims.d[2];cv::Size size{width, height};this->Letterbox(image, nchw, size);this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, height, width}});CHECK(cudaMemcpyAsync(this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
}
//----------------------------------------------------------------------------------------
void YOLOv8::CopyFromMat(const cv::Mat& image, cv::Size& size)
{cv::Mat nchw;this->Letterbox(image, nchw, size);this->context->setBindingDimensions(0, nvinfer1::Dims{4, {1, 3, size.height, size.width}});CHECK(cudaMemcpyAsync(this->device_ptrs[0], nchw.ptr<float>(), nchw.total() * nchw.elemSize(), cudaMemcpyHostToDevice, this->stream));
}
//----------------------------------------------------------------------------------------
void YOLOv8::Infer()
{this->context->enqueueV2(this->device_ptrs.data(), this->stream, nullptr);for (int i = 0; i < this->num_outputs; i++) {size_t osize = this->output_bindings[i].size * this->output_bindings[i].dsize;CHECK(cudaMemcpyAsync(this->host_ptrs[i], this->device_ptrs[i + this->num_inputs], osize, cudaMemcpyDeviceToHost, this->stream));}cudaStreamSynchronize(this->stream);
}
//----------------------------------------------------------------------------------------
void YOLOv8::PostProcess(std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int num_labels)
{objs.clear();auto num_channels = this->output_bindings[0].dims.d[1];auto num_anchors  = this->output_bindings[0].dims.d[2];auto& dw     = this->pparam.dw;auto& dh     = this->pparam.dh;auto& width  = this->pparam.width;auto& height = this->pparam.height;auto& ratio  = this->pparam.ratio;std::vector<cv::Rect> bboxes;std::vector<float>    scores;std::vector<int>      labels;std::vector<int>      indices;cv::Mat output = cv::Mat(num_channels, num_anchors, CV_32F, static_cast<float*>(this->host_ptrs[0]));output         = output.t();for (int i = 0; i < num_anchors; i++) {auto  row_ptr    = output.row(i).ptr<float>();auto  bboxes_ptr = row_ptr;auto  scores_ptr = row_ptr + 4;auto  max_s_ptr  = std::max_element(scores_ptr, scores_ptr + num_labels);float score      = *max_s_ptr;if (score > score_thres) {float x = *bboxes_ptr++ - dw;float y = *bboxes_ptr++ - dh;float w = *bboxes_ptr++;float h = *bboxes_ptr;float x0 = clamp((x - 0.5f * w) * ratio, 0.f, width);float y0 = clamp((y - 0.5f * h) * ratio, 0.f, height);float x1 = clamp((x + 0.5f * w) * ratio, 0.f, width);float y1 = clamp((y + 0.5f * h) * ratio, 0.f, height);int              label = max_s_ptr - scores_ptr;cv::Rect_<float> bbox;bbox.x      = x0;bbox.y      = y0;bbox.width  = x1 - x0;bbox.height = y1 - y0;bboxes.push_back(bbox);labels.push_back(label);scores.push_back(score);}}#ifdef BATCHED_NMScv::dnn::NMSBoxesBatched(bboxes, scores, labels, score_thres, iou_thres, indices);
#elsecv::dnn::NMSBoxes(bboxes, scores, score_thres, iou_thres, indices);
#endifint cnt = 0;for (auto& i : indices) {if (cnt >= topk) {break;}Object obj;obj.rect  = bboxes[i];obj.prob  = scores[i];obj.label = labels[i];objs.push_back(obj);cnt += 1;}
}
//----------------------------------------------------------------------------------------
void YOLOv8::DrawObjects(cv::Mat& bgr, const std::vector<Object>& objs)
{char text[256];for (auto& obj : objs) {cv::rectangle(bgr, obj.rect, cv::Scalar(255, 0, 0));sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);int      baseLine   = 0;cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);int x = (int)obj.rect.x;int y = (int)obj.rect.y - label_size.height - baseLine;if (y < 0)        y = 0;if (y > bgr.rows) y = bgr.rows;if (x + label_size.width > bgr.cols) x = bgr.cols - label_size.width;cv::rectangle(bgr, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), cv::Scalar(255, 255, 255), -1);cv::putText(bgr, text, cv::Point(x, y + label_size.height), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));}
}
//----------------------------------------------------------------------------------------

common.hpp

//
// Created by  triple-Mu     on 24-1-2023.
// Modified by Q-engineering on  6-3-2024
//#ifndef DETECT_NORMAL_COMMON_HPP
#define DETECT_NORMAL_COMMON_HPP
#include "NvInfer.h"
#include "opencv2/opencv.hpp"#define CHECK(call)                                                                                                    \do {                                                                                                               \const cudaError_t error_code = call;                                                                           \if (error_code != cudaSuccess) {                                                                               \printf("CUDA Error:\n");                                                                                   \printf("    File:       %s\n", __FILE__);                                                                  \printf("    Line:       %d\n", __LINE__);                                                                  \printf("    Error code: %d\n", error_code);                                                                \printf("    Error text: %s\n", cudaGetErrorString(error_code));                                            \exit(1);                                                                                                   \}                                                                                                              \} while (0)class Logger: public nvinfer1::ILogger {
public:nvinfer1::ILogger::Severity reportableSeverity;explicit Logger(nvinfer1::ILogger::Severity severity = nvinfer1::ILogger::Severity::kINFO):reportableSeverity(severity){}void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override{if (severity > reportableSeverity) {return;}switch (severity) {case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:std::cerr << "INTERNAL_ERROR: ";break;case nvinfer1::ILogger::Severity::kERROR:std::cerr << "ERROR: ";break;case nvinfer1::ILogger::Severity::kWARNING:std::cerr << "WARNING: ";break;case nvinfer1::ILogger::Severity::kINFO:std::cerr << "INFO: ";break;default:std::cerr << "VERBOSE: ";break;}std::cerr << msg << std::endl;}
};inline int get_size_by_dims(const nvinfer1::Dims& dims)
{int size = 1;for (int i = 0; i < dims.nbDims; i++) {size *= dims.d[i];}return size;
}inline int type_to_size(const nvinfer1::DataType& dataType)
{switch (dataType) {case nvinfer1::DataType::kFLOAT:return 4;case nvinfer1::DataType::kHALF:return 2;case nvinfer1::DataType::kINT32:return 4;case nvinfer1::DataType::kINT8:return 1;case nvinfer1::DataType::kBOOL:return 1;default:return 4;}
}inline static float clamp(float val, float min, float max)
{return val > min ? (val < max ? val : max) : min;
}namespace det {
struct Binding {size_t         size  = 1;size_t         dsize = 1;nvinfer1::Dims dims;std::string    name;
};struct Object {cv::Rect_<float> rect;int              label = 0;float            prob  = 0.0;
};struct PreParam {float ratio  = 1.0f;float dw     = 0.0f;float dh     = 0.0f;float height = 0;float width  = 0;
};
}  // namespace det
#endif  // DETECT_NORMAL_COMMON_HPP

yolov8.hpp

//
// Created by  triple-Mu     on 24-1-2023.
// Modified by Q-engineering on  6-3-2024
//
#ifndef DETECT_NORMAL_YOLOV8_HPP
#define DETECT_NORMAL_YOLOV8_HPP
#include "NvInferPlugin.h"
#include "common.hpp"
#include "fstream"using namespace det;class YOLOv8 {
private:nvinfer1::ICudaEngine*       engine  = nullptr;nvinfer1::IRuntime*          runtime = nullptr;nvinfer1::IExecutionContext* context = nullptr;cudaStream_t                 stream  = nullptr;Logger                       gLogger{nvinfer1::ILogger::Severity::kERROR};
public:int                  num_bindings;int                  num_inputs  = 0;int                  num_outputs = 0;std::vector<Binding> input_bindings;std::vector<Binding> output_bindings;std::vector<void*>   host_ptrs;std::vector<void*>   device_ptrs;PreParam pparam;public:explicit YOLOv8(const std::string& engine_file_path);~YOLOv8();void                 MakePipe(bool warmup = true);void                 CopyFromMat(const cv::Mat& image);void                 CopyFromMat(const cv::Mat& image, cv::Size& size);void                 Letterbox(const cv::Mat& image, cv::Mat& out, cv::Size& size);void                 Infer();void                 PostProcess(std::vector<Object>& objs, float score_thres, float iou_thres, int topk, int num_labels  = 80);void                 DrawObjects(cv::Mat& bgr, const std::vector<Object>& objs);
};
#endif  // DETECT_NORMAL_YOLOV8_HPP

CMakeLists.txt

cmake_minimum_required(VERSION 3.1)set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86 89 90)
set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)project(YoloV8rt LANGUAGES CXX CUDA)set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Release)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)# CUDA
include_directories(/usr/local/cuda-10.2/targets/aarch64-linux/include)
link_directories(/usr/local/cuda-10.2/targets/aarch64-linux/lib)
# find_package(CUDA REQUIRED)
# message(STATUS "CUDA Libs: \n${CUDA_LIBRARIES}\n")
# get_filename_component(CUDA_LIB_DIR ${CUDA_LIBRARIES} DIRECTORY)
# message(STATUS "CUDA Headers: \n${CUDA_INCLUDE_DIRS}\n")# OpenCV
find_package(OpenCV REQUIRED)# TensorRT
set(TensorRT_INCLUDE_DIRS /usr/include /usr/include/aarch-linux-gnu)
set(TensorRT_LIBRARIES /usr/lib/aarch64-linux-gnu)message(STATUS "TensorRT Libs: \n\n${TensorRT_LIBRARIES}\n")
message(STATUS "TensorRT Headers: \n${TensorRT_INCLUDE_DIRS}\n")list(APPEND INCLUDE_DIRS${CUDA_INCLUDE_DIRS}${OpenCV_INCLUDE_DIRS}${TensorRT_INCLUDE_DIRS}include)list(APPEND ALL_LIBS${CUDA_LIBRARIES}${CUDA_LIB_DIR}${OpenCV_LIBRARIES}${TensorRT_LIBRARIES})include_directories(${INCLUDE_DIRS})add_executable(${PROJECT_NAME}src/main.cppsrc/yolov8.cppinclude/yolov8.hppinclude/common.hpp)target_link_libraries(${PROJECT_NAME} PUBLIC ${ALL_LIBS})
target_link_libraries(${PROJECT_NAME} PRIVATE nvinfer nvinfer_plugin cudart ${OpenCV_LIBS})#place exe in parent folder
set(EXECUTABLE_OUTPUT_PATH "./")if (${OpenCV_VERSION} VERSION_GREATER_EQUAL 4.7.0)message(STATUS "Build with -DBATCHED_NMS")add_definitions(-DBATCHED_NMS)
endif ()

原项目地址
https://github.com/Qengineering/YoloV8-TensorRT-Jetson_Nano

在invidia jetpack4.5.1上运行c++版yolov8（tensorRT）

心路历程（可略过）

好在很幸运，找到了一个轻量级的能在nvidia arm64硬件上成功运行的轻量级c++yolov8代码，非常简洁好用，不需要依赖杂七杂八的库，可以说直接用jetpack默认的库就能可以简单编译而成，能找到非常不容易，下面是全部代码。

最新新闻

热搜词