项目地址:https://github.com/ibaiGorordo/ONNX-YOLOv10-Object-Detection
项目依赖:onnxruntime-gpu、opencv-python、imread-from-url、cap-from-youtube、ultralytics
1、代码修改
代码改动说明:yolov10/yolov10.py中的第18行修改为以下代码,明确指出使用cuda
self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])
同时将代码中的-> tuple[np.ndarray, np.ndarray, np.ndarray]:
修改为 -> tuple
,也就是删除tuple后面的描述
2、onnx模型导出
基于以下代码可以导出onnx模型,并放到ONNX-YOLOv10-Object-Detection项目下的models目录下。
from ultralytics import YOLO
if __name__ == '__main__':path=r"yolov10n.pt"model=YOLO(path).cuda()success = model.export(format="onnx")
3、检测图片
import cv2
from yolov10 import YOLOv10, draw_detections# Initialize yolov8 object detector
model_path = "models/yolov10n.onnx"
detector = YOLOv10(model_path, conf_thres=0.2)# Read image
img_url = r"D:\yolo_seq\ultralytics\datasets\coco128\images\train2017\000000000009.jpg"
img = cv2.imread(img_url)# Detect Objects
for i in range(10):class_ids, boxes, confidences = detector(img)# Draw detections
combined_img = draw_detections(img, boxes, confidences, class_ids)cv2.imshow("Detected Objects", combined_img)
cv2.waitKey(0)
再参考 https://hpg123.blog.csdn.net/article/details/141882208?spm=1001.2014.3001.5502 中rtsp拉流,即可以实现基于yolov10模型实现对rtsp视频流的实时检测了
4、关键代码
项目代码结构
4.1 init.py
from .yolov10 import YOLOv10
from .utils import draw_detections
4.2 utils.py
import os
import cv2
import numpy as np
import tqdm
import requestsclass_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light','fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow','elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard','tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple','sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard','cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase','scissors', 'teddy bear', 'hair drier', 'toothbrush']# Create a list of colors for each class where each color is a tuple of 3 integer values
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(class_names), 3))available_models = ["yolov10n", "yolov10s", "yolov10m", "yolov10b", "yolov10l", "yolov10x"]def download_model(url: str, path: str):print(f"Downloading model from {url} to {path}")r = requests.get(url, stream=True)with open(path, 'wb') as f:total_length = int(r.headers.get('content-length'))for chunk in tqdm.tqdm(r.iter_content(chunk_size=1024 * 1024), total=total_length // (1024 * 1024),bar_format='{l_bar}{bar:10}'):if chunk:f.write(chunk)f.flush()def check_model(model_path: str):if os.path.exists(model_path):returnmodel_name = os.path.basename(model_path).split('.')[0]if model_name not in available_models:raise ValueError(f"Invalid model name: {model_name}")url = f"https://github.com/THU-MIG/yolov10/releases/download/v1.1/{model_name}.onnx"download_model(url, model_path)def draw_detections(image, boxes, scores, class_ids, mask_alpha=0.3):det_img = image.copy()img_height, img_width = image.shape[:2]font_size = min([img_height, img_width]) * 0.0006text_thickness = int(min([img_height, img_width]) * 0.001)det_img = draw_masks(det_img, boxes, class_ids, mask_alpha)# Draw bounding boxes and labels of detectionsfor class_id, box, score in zip(class_ids, boxes, scores):color = colors[class_id]draw_box(det_img, box, color)label = class_names[class_id]caption = f'{label} {int(score * 100)}%'draw_text(det_img, caption, box, color, font_size, text_thickness)return det_imgdef draw_box(image: np.ndarray, box: np.ndarray, color: tuple = (0, 0, 255),thickness: int = 2) -> np.ndarray:x1, y1, x2, y2 = box.astype(int)return cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)def draw_text(image: np.ndarray, text: str, box: np.ndarray, color: tuple = (0, 0, 255),font_size: float = 0.001, text_thickness: int = 2) -> np.ndarray:x1, y1, x2, y2 = box.astype(int)(tw, th), _ = cv2.getTextSize(text=text, fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=font_size, thickness=text_thickness)th = int(th * 1.2)cv2.rectangle(image, (x1, y1),(x1 + tw, y1 - th), color, -1)return cv2.putText(image, text, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size, (255, 255, 255), text_thickness,cv2.LINE_AA)def draw_masks(image: np.ndarray, boxes: np.ndarray, classes: np.ndarray, mask_alpha: float = 0.3) -> np.ndarray:mask_img = image.copy()# Draw bounding boxes and labels of detectionsfor box, class_id in zip(boxes, classes):color = colors[class_id]x1, y1, x2, y2 = box.astype(int)# Draw fill rectangle in mask imagecv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)return cv2.addWeighted(mask_img, mask_alpha, image, 1 - mask_alpha, 0)
4.3 yolov10.py
import time
import cv2
import numpy as np
import onnxruntimefrom .utils import draw_detections, check_modelclass YOLOv10:def __init__(self, path: str, conf_thres: float = 0.2):self.conf_threshold = conf_threscheck_model(path)# Initialize modelself.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'])# Get model infoself.get_input_details()self.get_output_details()def __call__(self, image: np.ndarray) -> tuple:return self.detect_objects(image)def detect_objects(self, image: np.ndarray) -> tuple:input_tensor = self.prepare_input(image)# Perform inference on the imageoutputs = self.inference(input_tensor)return self.process_output(outputs[0])def prepare_input(self, image: np.ndarray) -> np.ndarray:self.img_height, self.img_width = image.shape[:2]input_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)# Resize input imageinput_img = cv2.resize(input_img, (self.input_width, self.input_height))print(input_img.shape)# Scale input pixel values to 0 to 1input_img = input_img / 255.0input_img = input_img.transpose(2, 0, 1)input_tensor = input_img[np.newaxis, :, :, :].astype(np.float32)return input_tensordef inference(self, input_tensor):start = time.perf_counter()outputs = self.session.run(self.output_names, {self.input_names[0]: input_tensor})print(f"Inference time: {(time.perf_counter() - start) * 1000:.2f} ms")return outputsdef process_output(self, output):output = output.squeeze()boxes = output[:, :-2]confidences = output[:, -2]class_ids = output[:, -1].astype(int)mask = confidences > self.conf_thresholdboxes = boxes[mask, :]confidences = confidences[mask]class_ids = class_ids[mask]# Rescale boxes to original image dimensionsboxes = self.rescale_boxes(boxes)return class_ids, boxes, confidencesdef rescale_boxes(self, boxes):input_shape = np.array([self.input_width, self.input_height, self.input_width, self.input_height])boxes = np.divide(boxes, input_shape, dtype=np.float32)boxes *= np.array([self.img_width, self.img_height, self.img_width, self.img_height])return boxesdef get_input_details(self):model_inputs = self.session.get_inputs()self.input_names = [model_inputs[i].name for i in range(len(model_inputs))]input_shape = model_inputs[0].shapeself.input_height = input_shape[2] if type(input_shape[2]) == int else 640self.input_width = input_shape[3] if type(input_shape[3]) == int else 640def get_output_details(self):model_outputs = self.session.get_outputs()self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]# https://github.com/ibaiGorordo/ONNX-YOLOv10-Object-Detection/tree/main
if __name__ == '__main__':model_path = "yolov10n.onnx"# Detect Objectsimg=cv2.imread(r"D:\yolo_seq\ultralytics\datasets\coco128\images\train2017\000000000009.jpg")#img=cv2.resize(img,(640,640))# Initialize YOLOv10 object detectordetector = YOLOv10(model_path)for i in range(10):class_ids, boxes, confidences = detector(img)# Draw detectionscombined_img = draw_detections(img, boxes, confidences, class_ids)print("img: ",img.shape)print("combined_img: ",combined_img.shape)cv2.imshow("Output", combined_img)cv2.waitKey(0)