1.环境配置:
pip install openvino-dev(2023.0.1)
pip install nncf(2.5.0)
pip install ultralytics
2.模型转换及nncf量化:
1.pytorch->onnx:
# Pytorch模型转换为Onnx模型
python from ultralytics import YOLO model = YOLO('yolov8s.pt')
# yolov8原生转换
result = model.export(format='onnx')
2.onnx->vino:
from openvino.tools import mo
from openvino.runtime import serialize#model_path为onnx模型路径
model = mo.convert_model(onnx_path)
#fp32_parh为vino模型保存出来的路径
serialize(model,fp32_path) #onnx2vino
3.nncf模型量化:
from openvino.tools import mo
from openvino.runtime import serialize
import nncf
from api import *#参数定义
model_name = 'yolov8n'
model_path = '/home/yy/yolov11-master/weights/yolov8n.onnx'
subset_size = 500
preset = nncf.QuantizationPreset.MIXEDfp16_path = f'/home/yy/yolov11-master/weights/fp16_{model_name}.xml'
model = mo.convert_model(model_path,compress_to_fp16=True)
serialize(model,fp16_path)fp32_path = "/home/yy/yolov11-master/weights/yolov8n_vino_fp32/yolov8n.xml"
#创造数据coco128
data_source = create_data_source('/home/yy/yolov11-master/Open_Vino-introductory-master/coco128.yaml')
pot_data_loader = YOLOv5POTDataLoader(data_source)
#nncf量化数据
nncf_calibration_dataset = nncf.Dataset(data_source, transform_fn)
#创造量化算子
core = Core()
ov_model = core.read_model(fp32_path)
q_model = nncf.quantize(ov_model,nncf_calibration_dataset,preset=preset,subset_size=subset_size
)#开始nncf量化模型
nncf_int8_path = f'/home/yy/yolov11-master/weights/{model_name}_nncf_int8/{model_name}_nncf_int8.xml'
serialize(q_model,nncf_int8_path)
3.使用int8量化后的模型推理:
1.detect.py
from api import *
from openvino.runtime import Core
import cv2
import time
from PIL import Image
import numpy as npdef predict(model,label_dict:dict,obj_path:str,cap=False):#使用xml进行预测core = Core()det_ov_model = core.read_model(model)device = 'CPU'det_compiled_model = core.compile_model(det_ov_model, device)cap = cv2.VideoCapture(obj_path)while True:ret,frame = cap.read()if ret:frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)t1 = time.time()detections = detect(frame, det_compiled_model,nc=80)[0]t2 = time.time()fps = 1/(t2-t1)print(fps)image_with_boxes = draw_results(detections, frame, label_dict)image_with_boxes = cv2.cvtColor(image_with_boxes, cv2.COLOR_BGR2RGB)cv2.imshow('vid',image_with_boxes)if cv2.waitKey(1) & 0xff==ord('q'):breakelse:continueif __name__ == '__main__':label_dict = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}predict('/home/yy/yolov11-master/weights/yolov8n_nncf_int8/yolov8n_nncf_int8.xml',label_dict,'/home/yy/yolov11-master/source_video/card.mp4',cap=True)
2.api.py
import numpy as np
import torch
from ultralytics.yolo.utils import ops
from openvino.runtime import Core,Model
import random
from typing import Tuple, Dict
import cv2
from ultralytics.yolo.utils.plotting import colors
from yolov5.utils.dataloaders import create_dataloader
from tqdm.notebook import tqdm
from ultralytics.yolo.utils.metrics import ConfusionMatrix
from yolov5.utils.general import check_dataset
from openvino.tools.pot.api import DataLoaderdef test(model:Model, core:Core, data_loader:torch.utils.data.DataLoader,nc ,validator, num_samples:int = None):"""OpenVINO YOLOv8 model accuracy validation function. Runs model validation on dataset and returns metricsParameters:model (Model): OpenVINO modeldata_loader (torch.utils.data.DataLoader): dataset loadervalidato: instalce of validator classnum_samples (int, *optional*, None): validate model only on specified number samples, if providedReturns:stats: (Dict[str, float]) - dictionary with aggregated accuracy metrics statistics, key is metric name, value is metric value"""validator.seen = 0validator.jdict = []validator.stats = []validator.batch_i = 1validator.confusion_matrix = ConfusionMatrix(nc=nc)model.reshape({0: [1, 3, -1, -1]})num_outputs = len(model.outputs)compiled_model = core.compile_model(model)for batch_i, batch in enumerate(tqdm(data_loader, total=num_samples)):if num_samples is not None and batch_i == num_samples:breakbatch = validator.preprocess(batch)results = compiled_model(batch["img"])if num_outputs == 1:preds = torch.from_numpy(results[compiled_model.output(0)])else:preds = [torch.from_numpy(results[compiled_model.output(0)]), torch.from_numpy(results[compiled_model.output(1)])]preds = validator.postprocess(preds)validator.update_metrics(preds, batch)stats = validator.get_stats()return statsdef print_stats(stats:np.ndarray, total_images:int, total_objects:int):"""Helper function for printing accuracy statisticParameters:stats: (Dict[str, float]) - dictionary with aggregated accuracy metrics statistics, key is metric name, value is metric valuetotal_images (int) - number of evaluated imagestotal objects (int)Returns:None"""print("Boxes:")mp, mr, map50, mean_ap = stats['metrics/precision(B)'], stats['metrics/recall(B)'], stats['metrics/mAP50(B)'], stats['metrics/mAP50-95(B)']# Print resultss = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'Precision', 'Recall', 'mAP@.5', 'mAP@.5:.95')print(s)pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print formatprint(pf % ('all', total_images, total_objects, mp, mr, map50, mean_ap))if 'metrics/precision(M)' in stats:s_mp, s_mr, s_map50, s_mean_ap = stats['metrics/precision(M)'], stats['metrics/recall(M)'], stats['metrics/mAP50(M)'], stats['metrics/mAP50-95(M)']# Print resultss = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'Precision', 'Recall', 'mAP@.5', 'mAP@.5:.95')print(s)pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print formatprint(pf % ('all', total_images, total_objects, s_mp, s_mr, s_map50, s_mean_ap))def plot_one_box(box: np.ndarray, img: np.ndarray, color: Tuple[int, int, int] = None, mask: np.ndarray = None,label: str = None, line_thickness: int = 5):"""Helper function for drawing single bounding box on imageParameters:x (np.ndarray): bounding box coordinates in format [x1, y1, x2, y2]img (no.ndarray): input imagecolor (Tuple[int, int, int], *optional*, None): color in BGR format for drawing box, if not specified will be selected randomlymask (np.ndarray, *optional*, None): instance segmentation mask polygon in format [N, 2], where N - number of points in contour, if not provided, only box will be drawnlabel (str, *optonal*, None): box label string, if not provided will not be provided as drowing resultline_thickness (int, *optional*, 5): thickness for box drawing lines"""# Plots one bounding box on image imgtl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thicknesscolor = color or [random.randint(0, 255) for _ in range(3)]c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)if label:tf = max(tl - 1, 1) # font thicknesst_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filledcv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)if mask is not None:image_with_mask = img.copy()maskcv2.fillPoly(image_with_mask, pts=[mask.astype(int)], color=color)img = cv2.addWeighted(img, 0.5, image_with_mask, 0.5, 1)return imgdef draw_results(results: Dict, source_image: np.ndarray, label_map: Dict):"""Helper function for drawing bounding boxes on imageParameters:image_res (np.ndarray): detection predictions in format [x1, y1, x2, y2, score, label_id]source_image (np.ndarray): input image for drawinglabel_map; (Dict[int, str]): label_id to class name mappingReturns:"""boxes = results["det"]masks = results.get("segment")h, w = source_image.shape[:2]for idx, (*xyxy, conf, lbl) in enumerate(boxes):label = f'{label_map[int(lbl)]} {conf:.2f}'mask = masks[idx] if masks is not None else Nonesource_image = plot_one_box(xyxy, source_image, mask=mask, label=label, color=colors(int(lbl)),line_thickness=1)return source_imagedef letterbox(img: np.ndarray, new_shape: Tuple[int, int] = (640, 640), color: Tuple[int, int, int] = (114, 114, 114),auto: bool = False, scale_fill: bool = False, scaleup: bool = False, stride: int = 32):"""Resize image and padding for detection. Takes image as input,resizes image to fit into new shape with saving original aspect ratio and pads it to meet stride-multiple constraintsParameters:img (np.ndarray): image for preprocessingnew_shape (Tuple(int, int)): image size after preprocessing in format [height, width]color (Tuple(int, int, int)): color for filling padded areaauto (bool): use dynamic input size, only padding for stride constrins appliedscale_fill (bool): scale image to fill new_shapescaleup (bool): allow scale image if it is lower then desired input size, can affect model accuracystride (int): input padding strideReturns:img (np.ndarray): image after preprocessingratio (Tuple(float, float)): hight and width scaling ratiopadding_size (Tuple(int, int)): height and width padding size"""# Resize and pad image while meeting stride-multiple constraintsshape = img.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not scaleup: # only scale down, do not scale up (for better test mAP)r = min(r, 1.0)# Compute paddingratio = r, r # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh paddingif auto: # minimum rectangledw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh paddingelif scale_fill: # stretchdw, dh = 0.0, 0.0new_unpad = (new_shape[1], new_shape[0])ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratiosdw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeimg = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add borderreturn img, ratio, (dw, dh)def preprocess_image(img0: np.ndarray):"""Preprocess image according to YOLOv8 input requirements.Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW.Parameters:img0 (np.ndarray): image for preprocessingReturns:img (np.ndarray): image after preprocessing"""# resizeimg = letterbox(img0)[0]# Convert HWC to CHWimg = img.transpose(2, 0, 1)img = np.ascontiguousarray(img)return imgdef image_to_tensor(image: np.ndarray):"""Preprocess image according to YOLOv8 input requirements.Takes image in np.array format, resizes it to specific size using letterbox resize and changes data layout from HWC to CHW.Parameters:img (np.ndarray): image for preprocessingReturns:input_tensor (np.ndarray): input tensor in NCHW format with float32 values in [0, 1] range"""input_tensor = image.astype(np.float32) # uint8 to fp32input_tensor /= 255.0 # 0 - 255 to 0.0 - 1.0# add batch dimensionif input_tensor.ndim == 3:input_tensor = np.expand_dims(input_tensor, 0)return input_tensor
def postprocess(nc,pred_boxes:np.ndarray,input_hw:Tuple[int, int],orig_img:np.ndarray,min_conf_threshold:float = 0.25,nms_iou_threshold:float = 0.7,agnosting_nms:bool = False,max_detections:int = 300,pred_masks:np.ndarray = None,retina_mask:bool = False
):"""YOLOv8 model postprocessing function. Applied non maximum supression algorithm to detections and rescale boxes to original image sizeParameters:pred_boxes (np.ndarray): model output prediction boxesinput_hw (np.ndarray): preprocessed imageorig_image (np.ndarray): image before preprocessingmin_conf_threshold (float, *optional*, 0.25): minimal accepted confidence for object filteringnms_iou_threshold (float, *optional*, 0.45): minimal overlap score for removing objects duplicates in NMSagnostic_nms (bool, *optiona*, False): apply class agnostinc NMS approach or notmax_detections (int, *optional*, 300): maximum detections after NMSpred_masks (np.ndarray, *optional*, None): model ooutput prediction masks, if not provided only boxes will be postprocessedretina_mask (bool, *optional*, False): retina mask postprocessing instead of native decodingReturns:pred (List[Dict[str, np.ndarray]]): list of dictionary with det - detected boxes in format [x1, y1, x2, y2, score, label] and segment - segmentation polygons for each element in batch"""nms_kwargs = {"agnostic": agnosting_nms, "max_det":max_detections}# if pred_masks is not None:# nms_kwargs["nm"] = 32preds = ops.non_max_suppression(torch.from_numpy(pred_boxes),min_conf_threshold,nms_iou_threshold,nc=nc,**nms_kwargs)results = []proto = torch.from_numpy(pred_masks) if pred_masks is not None else Nonefor i, pred in enumerate(preds):shape = orig_img[i].shape if isinstance(orig_img, list) else orig_img.shapeif not len(pred):results.append({"det": [], "segment": []})continueif proto is None:pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round()results.append({"det": pred})continueif retina_mask:pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round()masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], shape[:2]) # HWCsegments = [ops.scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)]else:masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], input_hw, upsample=True)pred[:, :4] = ops.scale_boxes(input_hw, pred[:, :4], shape).round()segments = [ops.scale_segments(input_hw, x, shape, normalize=False) for x in ops.masks2segments(masks)]results.append({"det": pred[:, :6].numpy(), "segment": segments})return resultsdef detect(image:np.ndarray, model:Model,nc):"""OpenVINO YOLOv8 model inference function. Preprocess image, runs model inference and postprocess results using NMS.Parameters:image (np.ndarray): input image.model (Model): OpenVINO compiled model.Returns:detections (np.ndarray): detected boxes in format [x1, y1, x2, y2, score, label]"""num_outputs = len(model.outputs)preprocessed_image = preprocess_image(image)input_tensor = image_to_tensor(preprocessed_image)result = model(input_tensor)boxes = result[model.output(0)]masks = Noneif num_outputs > 1:masks = result[model.output(1)]input_hw = input_tensor.shape[2:]detections = postprocess(nc=nc,pred_boxes=boxes, input_hw=input_hw, orig_img=image, pred_masks=masks)return detectionsdef transform_fn(data_item):# unpack input images tensorimages = data_item[0]# convert input tensor into float formatimages = images.float()# scale inputimages = images / 255# convert torch tensor to numpy arrayimages = images.cpu().detach().numpy()return images
class YOLOv5POTDataLoader(DataLoader):"""Inherit from DataLoader function and implement for YOLOv5."""def __init__(self, data_source):super().__init__({})self._data_loader = data_sourceself._data_iter = iter(self._data_loader)def __len__(self):return len(self._data_loader.dataset)def __getitem__(self, item):try:batch_data = next(self._data_iter)except StopIteration:self._data_iter = iter(self._data_loader)batch_data = next(self._data_iter)im, target, path, shape = batch_dataim = im.float()im /= 255nb, _, height, width = im.shapeimg = im.cpu().detach().numpy()target = target.cpu().detach().numpy()annotation = dict()annotation["image_path"] = pathannotation["target"] = targetannotation["batch_size"] = nbannotation["shape"] = shapeannotation["width"] = widthannotation["height"] = heightannotation["img"] = imgreturn (item, annotation), imgdef create_data_source(dataset_yaml):data = check_dataset(dataset_yaml)val_dataloader = create_dataloader(data["val"], imgsz=640, batch_size=1, stride=32, pad=0.5, workers=1)[0]return val_dataloaderdef detect_without_preprocess(nc,image:np.ndarray, model:Model):"""OpenVINO YOLOv8 model with integrated preprocessing inference function. Preprocess image, runs model inference and postprocess results using NMS.Parameters:image (np.ndarray): input image.model (Model): OpenVINO compiled model.Returns:detections (np.ndarray): detected boxes in format [x1, y1, x2, y2, score, label]:param nc:"""output_layer = model.output(0)img = letterbox(image)[0]input_tensor = np.expand_dims(img, 0)input_hw = img.shape[:2]result = model(input_tensor)[output_layer]detections = postprocess(result, input_hw, image)return detections