Skip to content

Commit 1b6dd6c

Browse files
committed
v1.0.4 Modified to work with models with rectangular input shapes.
1 parent c2886eb commit 1b6dd6c

File tree

13 files changed

+255
-99
lines changed

13 files changed

+255
-99
lines changed

Assets/YOLOv8WithOpenCVForUnityExample/Scripts/YOLOv8WithOpenCVForUnity/YOLOv8ClassPredictor.cs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ public YOLOv8ClassPredictor(string modelFilepath, string classesFilepath, Size i
7272

7373
protected virtual Mat preprocess(Mat image)
7474
{
75-
76-
// Create a 4D blob from a frame.
75+
// https://github.com/ultralytics/ultralytics/blob/d74a5a9499acf1afd13d970645e5b1cfcadf4a8f/ultralytics/data/augment.py#L1059
7776

77+
// Resizes and crops the center of the image using a letterbox method.
7878
int c = image.channels();
7979
int h = (int)input_size.height;
8080
int w = (int)input_size.width;
@@ -93,7 +93,6 @@ protected virtual Mat preprocess(Mat image)
9393
Mat blob = Dnn.blobFromImage(input_sizeMat, 1.0 / 255.0, input_size, Scalar.all(0), true, false, CvType.CV_32F); // HWC to NCHW, BGR to RGB
9494

9595
return blob;// [1, 3, h, w]
96-
9796
}
9897

9998
public virtual Mat infer(Mat image)
@@ -154,7 +153,7 @@ public virtual void visualize(Mat image, Mat results, bool print_results = false
154153
if (image.IsDisposed)
155154
return;
156155

157-
if (results.empty() || results.cols() < classNames.Count)
156+
if (results.empty())
158157
return;
159158

160159
StringBuilder sb = null;
@@ -308,7 +307,7 @@ public virtual string getClassLabel(float id)
308307
string className = string.Empty;
309308
if (classNames != null && classNames.Count != 0)
310309
{
311-
if (classId >= 0 && classId < (int)classNames.Count)
310+
if (classId >= 0 && classId < classNames.Count)
312311
{
313312
className = classNames[classId];
314313
}

Assets/YOLOv8WithOpenCVForUnityExample/Scripts/YOLOv8WithOpenCVForUnity/YOLOv8ObjectDetector.cs

Lines changed: 35 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public class YOLOv8ObjectDetector
3030

3131
List<Scalar> palette;
3232

33-
Mat maxSizeImg;
33+
Mat paddedImg;
3434

3535
Mat pickup_blob_numx6;
3636
Mat boxesMat;
@@ -91,20 +91,26 @@ public YOLOv8ObjectDetector(string modelFilepath, string classesFilepath, Size i
9191

9292
protected virtual Mat preprocess(Mat image)
9393
{
94-
// Add padding to make it square.
95-
int max = Mathf.Max(image.cols(), image.rows());
94+
// https://github.com/ultralytics/ultralytics/blob/d74a5a9499acf1afd13d970645e5b1cfcadf4a8f/ultralytics/data/augment.py#L645
9695

97-
if (maxSizeImg == null)
98-
maxSizeImg = new Mat(max, max, image.type());
99-
if (maxSizeImg.width() != max || maxSizeImg.height() != max)
100-
maxSizeImg.create(max, max, image.type());
96+
// Add padding to make it input size.
97+
// (padding to center the image)
98+
float ratio = Mathf.Max((float)image.cols() / (float)input_size.width, (float)image.rows() / (float)input_size.height);
99+
int padw = (int)Mathf.Ceil((float)input_size.width * ratio);
100+
int padh = (int)Mathf.Ceil((float)input_size.height * ratio);
101101

102-
Imgproc.rectangle(maxSizeImg, new OpenCVRect(0, 0, maxSizeImg.width(), maxSizeImg.height()), Scalar.all(114), -1);
103-
Mat _maxSizeImg_roi = new Mat(maxSizeImg, new OpenCVRect((max - image.cols()) / 2, (max - image.rows()) / 2, image.cols(), image.rows()));
104-
image.copyTo(_maxSizeImg_roi);
102+
if (paddedImg == null)
103+
paddedImg = new Mat(padh, padw, image.type(), Scalar.all(114));
104+
if (paddedImg.width() != padw || paddedImg.height() != padh)
105+
{
106+
paddedImg.create(padh, padw, image.type());
107+
Imgproc.rectangle(paddedImg, new OpenCVRect(0, 0, paddedImg.width(), paddedImg.height()), Scalar.all(114), -1);
108+
}
105109

106-
// Create a 4D blob from a frame.
107-
Mat blob = Dnn.blobFromImage(maxSizeImg, 1.0 / 255.0, input_size, Scalar.all(0), true, false, CvType.CV_32F); // HWC to NCHW, BGR to RGB
110+
Mat _paddedImg_roi = new Mat(paddedImg, new OpenCVRect((paddedImg.cols() - image.cols()) / 2, (paddedImg.rows() - image.rows()) / 2, image.cols(), image.rows()));
111+
image.copyTo(_paddedImg_roi);
112+
113+
Mat blob = Dnn.blobFromImage(paddedImg, 1.0 / 255.0, input_size, Scalar.all(0), true, false, CvType.CV_32F); // HWC to NCHW, BGR to RGB
108114

109115
return blob;// [1, 3, h, w]
110116
}
@@ -131,11 +137,11 @@ public virtual Mat infer(Mat image)
131137
Mat results = postprocess(output_blob[0], image.size());
132138

133139
// scale_boxes
134-
float maxSize = Mathf.Max((float)image.size().width, (float)image.size().height);
135-
float x_factor = maxSize / (float)input_size.width;
136-
float y_factor = maxSize / (float)input_size.height;
137-
float x_shift = (maxSize - (float)image.size().width) / 2f;
138-
float y_shift = (maxSize - (float)image.size().height) / 2f;
140+
float ratio = Mathf.Max((float)image.cols() / (float)input_size.width, (float)image.rows() / (float)input_size.height);
141+
float x_factor = ratio;
142+
float y_factor = ratio;
143+
float x_shift = ((float)input_size.width * ratio - (float)image.size().width) / 2f;
144+
float y_shift = ((float)input_size.height * ratio - (float)image.size().height) / 2f;
139145

140146
for (int i = 0; i < results.rows(); ++i)
141147
{
@@ -167,8 +173,14 @@ protected virtual Mat postprocess(Mat output_blob, Size original_shape)
167173
MatOfInt order = new MatOfInt(0, 2, 1);
168174
Core.transposeND(output_blob_0, order, output_blob_0);
169175

170-
if (output_blob_0.size(2) < 4 + num_classes)
171-
return new Mat();
176+
if (output_blob_0.size(2) != 4 + num_classes)
177+
{
178+
Debug.LogWarning("The number of classes and output shapes are different. " +
179+
"( output_blob_0.size(2):" + output_blob_0.size(2) + " != 4 + num_classes:" + num_classes + " )\n" +
180+
"When using a custom model, be sure to set the correct number of classes by loading the appropriate custom classesFile.");
181+
182+
num_classes = output_blob_0.size(2) - 4;
183+
}
172184

173185
int num = output_blob_0.size(1);
174186
Mat output_blob_numx84 = output_blob_0.reshape(1, num);
@@ -348,10 +360,10 @@ public virtual void dispose()
348360
if (object_detection_net != null)
349361
object_detection_net.Dispose();
350362

351-
if (maxSizeImg != null)
352-
maxSizeImg.Dispose();
363+
if (paddedImg != null)
364+
paddedImg.Dispose();
353365

354-
maxSizeImg = null;
366+
paddedImg = null;
355367

356368
if (pickup_blob_numx6 != null)
357369
pickup_blob_numx6.Dispose();
@@ -457,7 +469,7 @@ public virtual string getClassLabel(float id)
457469
string className = string.Empty;
458470
if (classNames != null && classNames.Count != 0)
459471
{
460-
if (classId >= 0 && classId < (int)classNames.Count)
472+
if (classId >= 0 && classId < classNames.Count)
461473
{
462474
className = classNames[classId];
463475
}

Assets/YOLOv8WithOpenCVForUnityExample/Scripts/YOLOv8WithOpenCVForUnity/YOLOv8PoseEstimater.cs

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public class YOLOv8PoseEstimater
3535
int[] limb_color_ind;
3636
int[] kpt_color_ind;
3737

38-
Mat maxSizeImg;
38+
Mat paddedImg;
3939

4040
Mat pickup_blob_numx6kpts;
4141
Mat boxesMat;
@@ -123,20 +123,26 @@ public YOLOv8PoseEstimater(string modelFilepath, string classesFilepath, Size in
123123

124124
protected virtual Mat preprocess(Mat image)
125125
{
126-
// Add padding to make it square.
127-
int max = Mathf.Max(image.cols(), image.rows());
126+
// https://github.com/ultralytics/ultralytics/blob/d74a5a9499acf1afd13d970645e5b1cfcadf4a8f/ultralytics/data/augment.py#L645
128127

129-
if (maxSizeImg == null)
130-
maxSizeImg = new Mat(max, max, image.type());
131-
if (maxSizeImg.width() != max || maxSizeImg.height() != max)
132-
maxSizeImg.create(max, max, image.type());
128+
// Add padding to make it input size.
129+
// (padding to center the image)
130+
float ratio = Mathf.Max((float)image.cols() / (float)input_size.width, (float)image.rows() / (float)input_size.height);
131+
int padw = (int)Mathf.Ceil((float)input_size.width * ratio);
132+
int padh = (int)Mathf.Ceil((float)input_size.height * ratio);
133133

134-
Imgproc.rectangle(maxSizeImg, new OpenCVRect(0, 0, maxSizeImg.width(), maxSizeImg.height()), Scalar.all(114), -1);
135-
Mat _maxSizeImg_roi = new Mat(maxSizeImg, new OpenCVRect((max - image.cols()) / 2, (max - image.rows()) / 2, image.cols(), image.rows()));
136-
image.copyTo(_maxSizeImg_roi);
134+
if (paddedImg == null)
135+
paddedImg = new Mat(padh, padw, image.type(), Scalar.all(114));
136+
if (paddedImg.width() != padw || paddedImg.height() != padh)
137+
{
138+
paddedImg.create(padh, padw, image.type());
139+
Imgproc.rectangle(paddedImg, new OpenCVRect(0, 0, paddedImg.width(), paddedImg.height()), Scalar.all(114), -1);
140+
}
137141

138-
// Create a 4D blob from a frame.
139-
Mat blob = Dnn.blobFromImage(maxSizeImg, 1.0 / 255.0, input_size, Scalar.all(0), true, false, CvType.CV_32F); // HWC to NCHW, BGR to RGB
142+
Mat _paddedImg_roi = new Mat(paddedImg, new OpenCVRect((paddedImg.cols() - image.cols()) / 2, (paddedImg.rows() - image.rows()) / 2, image.cols(), image.rows()));
143+
image.copyTo(_paddedImg_roi);
144+
145+
Mat blob = Dnn.blobFromImage(paddedImg, 1.0 / 255.0, input_size, Scalar.all(0), true, false, CvType.CV_32F); // HWC to NCHW, BGR to RGB
140146

141147
return blob;// [1, 3, h, w]
142148
}
@@ -165,11 +171,11 @@ public virtual List<Mat> infer(Mat image)
165171
// scale_boxes and scale_landmarks
166172
Mat det_c0_c6 = det.colRange(0, 6).clone();
167173
Mat kpts = det.colRange(6, 6 + num_kpts).clone();
168-
float maxSize = Mathf.Max((float)image.size().width, (float)image.size().height);
169-
float x_factor = maxSize / (float)input_size.width;
170-
float y_factor = maxSize / (float)input_size.height;
171-
float x_shift = (maxSize - (float)image.size().width) / 2f;
172-
float y_shift = (maxSize - (float)image.size().height) / 2f;
174+
float ratio = Mathf.Max((float)image.cols() / (float)input_size.width, (float)image.rows() / (float)input_size.height);
175+
float x_factor = ratio;
176+
float y_factor = ratio;
177+
float x_shift = ((float)input_size.width * ratio - (float)image.size().width) / 2f;
178+
float y_shift = ((float)input_size.height * ratio - (float)image.size().height) / 2f;
173179

174180
for (int i = 0; i < det.rows(); ++i)
175181
{
@@ -222,8 +228,14 @@ protected virtual Mat postprocess(Mat output_blob, Size original_shape)
222228
MatOfInt order = new MatOfInt(0, 2, 1);
223229
Core.transposeND(output_blob_0, order, output_blob_0);
224230

225-
if (output_blob_0.size(2) < 4 + num_classes + num_kpts)
226-
return new Mat();
231+
if (output_blob_0.size(2) != 4 + num_classes + num_kpts)
232+
{
233+
Debug.LogWarning("The number of classes and output shapes are different. " +
234+
"( output_blob_0.size(2):" + output_blob_0.size(2) + " != 4 + num_classes:" + num_classes + " + " + num_kpts + " )\n" +
235+
"When using a custom model, be sure to set the correct number of classes by loading the appropriate custom classesFile.");
236+
237+
num_classes = output_blob_0.size(2) - 4 - num_kpts;
238+
}
227239

228240
int num = output_blob_0.size(1);
229241
Mat output_blob_numx56 = output_blob_0.reshape(1, num);
@@ -322,7 +334,7 @@ protected virtual Mat postprocess(Mat output_blob, Size original_shape)
322334
}
323335

324336
Mat results = new Mat(indices.rows(), 6 + num_kpts, CvType.CV_32FC1);
325-
337+
326338
for (int i = 0; i < indices.rows(); ++i)
327339
{
328340
int idx = (int)indices.get(i, 0)[0];
@@ -337,7 +349,7 @@ protected virtual Mat postprocess(Mat output_blob, Size original_shape)
337349
float h = bbox_arr[3];
338350
results.put(i, 0, new float[] { x, y, x + w, y + h });
339351
}
340-
352+
341353
indices.Dispose();
342354

343355
// [
@@ -403,7 +415,7 @@ public virtual void visualize(Mat image, Mat results, bool print_results = false
403415
}
404416
}
405417

406-
public virtual void visualize_kpts(Mat image, Mat kpts, int radius = 5, bool kpt_line= true, bool isRGB = false)
418+
public virtual void visualize_kpts(Mat image, Mat kpts, int radius = 5, bool kpt_line = true, bool isRGB = false)
407419
{
408420
// Note: `kpt_line = True` currently only supports human pose plotting.
409421

@@ -484,10 +496,10 @@ public virtual void dispose()
484496
if (object_detection_net != null)
485497
object_detection_net.Dispose();
486498

487-
if (maxSizeImg != null)
488-
maxSizeImg.Dispose();
499+
if (paddedImg != null)
500+
paddedImg.Dispose();
489501

490-
maxSizeImg = null;
502+
paddedImg = null;
491503

492504
if (pickup_blob_numx6kpts != null)
493505
pickup_blob_numx6kpts.Dispose();
@@ -593,7 +605,7 @@ public virtual string getClassLabel(float id)
593605
string className = string.Empty;
594606
if (classNames != null && classNames.Count != 0)
595607
{
596-
if (classId >= 0 && classId < (int)classNames.Count)
608+
if (classId >= 0 && classId < classNames.Count)
597609
{
598610
className = classNames[classId];
599611
}

Assets/YOLOv8WithOpenCVForUnityExample/Scripts/YOLOv8WithOpenCVForUnity/YOLOv8SegmentPredictor.cs

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public class YOLOv8SegmentPredictor
3232

3333
List<Scalar> palette;
3434

35-
Mat maxSizeImg;
35+
Mat paddedImg;
3636

3737
Mat pickup_blob_numx6mask;
3838
Mat boxesMat;
@@ -100,22 +100,26 @@ public YOLOv8SegmentPredictor(string modelFilepath, string classesFilepath, Size
100100

101101
protected virtual Mat preprocess(Mat image)
102102
{
103-
// Add padding to make it square.
104-
int max = Mathf.Max(image.cols(), image.rows());
103+
// https://github.com/ultralytics/ultralytics/blob/d74a5a9499acf1afd13d970645e5b1cfcadf4a8f/ultralytics/data/augment.py#L645
105104

106-
if (maxSizeImg == null)
107-
maxSizeImg = new Mat(max, max, image.type(), Scalar.all(114));
108-
if (maxSizeImg.width() != max || maxSizeImg.height() != max)
105+
// Add padding to make it input size.
106+
// (padding to center the image)
107+
float ratio = Mathf.Max((float)image.cols() / (float)input_size.width, (float)image.rows() / (float)input_size.height);
108+
int padw = (int)Mathf.Ceil((float)input_size.width * ratio);
109+
int padh = (int)Mathf.Ceil((float)input_size.height * ratio);
110+
111+
if (paddedImg == null)
112+
paddedImg = new Mat(padh, padw, image.type(), Scalar.all(114));
113+
if (paddedImg.width() != padw || paddedImg.height() != padh)
109114
{
110-
maxSizeImg.create(max, max, image.type());
111-
Imgproc.rectangle(maxSizeImg, new OpenCVRect(0, 0, maxSizeImg.width(), maxSizeImg.height()), Scalar.all(114), -1);
115+
paddedImg.create(padh, padw, image.type());
116+
Imgproc.rectangle(paddedImg, new OpenCVRect(0, 0, paddedImg.width(), paddedImg.height()), Scalar.all(114), -1);
112117
}
113118

114-
Mat _maxSizeImg_roi = new Mat(maxSizeImg, new OpenCVRect((max - image.cols()) / 2, (max - image.rows()) / 2, image.cols(), image.rows()));
115-
image.copyTo(_maxSizeImg_roi);
119+
Mat _paddedImg_roi = new Mat(paddedImg, new OpenCVRect((paddedImg.cols() - image.cols()) / 2, (paddedImg.rows() - image.rows()) / 2, image.cols(), image.rows()));
120+
image.copyTo(_paddedImg_roi);
116121

117-
// Create a 4D blob from a frame.
118-
Mat blob = Dnn.blobFromImage(maxSizeImg, 1.0 / 255.0, input_size, Scalar.all(0), true, false, CvType.CV_32F); // HWC to NCHW, BGR to RGB
122+
Mat blob = Dnn.blobFromImage(paddedImg, 1.0 / 255.0, input_size, Scalar.all(0), true, false, CvType.CV_32F); // HWC to NCHW, BGR to RGB
119123

120124
return blob;// [1, 3, h, w]
121125
}
@@ -150,11 +154,11 @@ public virtual List<Mat> infer(Mat image)
150154

151155
// scale_boxes
152156
Mat det_c0_c6 = det.colRange(0, 6).clone();
153-
float maxSize = Mathf.Max((float)image.size().width, (float)image.size().height);
154-
float x_factor = maxSize / (float)input_size.width;
155-
float y_factor = maxSize / (float)input_size.height;
156-
float x_shift = (maxSize - (float)image.size().width) / 2f;
157-
float y_shift = (maxSize - (float)image.size().height) / 2f;
157+
float ratio = Mathf.Max((float)image.cols() / (float)input_size.width, (float)image.rows() / (float)input_size.height);
158+
float x_factor = ratio;
159+
float y_factor = ratio;
160+
float x_shift = ((float)input_size.width * ratio - (float)image.size().width) / 2f;
161+
float y_shift = ((float)input_size.height * ratio - (float)image.size().height) / 2f;
158162

159163
for (int i = 0; i < det.rows(); ++i)
160164
{
@@ -196,8 +200,14 @@ protected virtual Mat postprocess(Mat output_blob, Size original_shape)
196200
MatOfInt order = new MatOfInt(0, 2, 1);
197201
Core.transposeND(output_blob_0, order, output_blob_0);
198202

199-
if (output_blob_0.size(2) < 4 + num_classes + num_masks)
200-
return new Mat();
203+
if (output_blob_0.size(2) != 4 + num_classes + num_masks)
204+
{
205+
Debug.LogWarning("The number of classes and output shapes are different. " +
206+
"( output_blob_0.size(2):" + output_blob_0.size(2) + " != 4 + num_classes:" + num_classes + " + " + num_masks + " )\n" +
207+
"When using a custom model, be sure to set the correct number of classes by loading the appropriate custom classesFile.");
208+
209+
num_classes = output_blob_0.size(2) - 4 - num_masks;
210+
}
201211

202212
int num = output_blob_0.size(1);
203213
Mat output_blob_numx116 = output_blob_0.reshape(1, num);
@@ -571,10 +581,10 @@ public virtual void dispose()
571581
if (segmentation_net != null)
572582
segmentation_net.Dispose();
573583

574-
if (maxSizeImg != null)
575-
maxSizeImg.Dispose();
584+
if (paddedImg != null)
585+
paddedImg.Dispose();
576586

577-
maxSizeImg = null;
587+
paddedImg = null;
578588

579589
if (pickup_blob_numx6mask != null)
580590
pickup_blob_numx6mask.Dispose();
@@ -702,7 +712,7 @@ public virtual string getClassLabel(float id)
702712
string className = string.Empty;
703713
if (classNames != null && classNames.Count != 0)
704714
{
705-
if (classId >= 0 && classId < (int)classNames.Count)
715+
if (classId >= 0 && classId < classNames.Count)
706716
{
707717
className = classNames[classId];
708718
}

Assets/YOLOv8WithOpenCVForUnityExample/YOLOv8ClassificationExample/YOLOv8ClassificationExample.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,9 @@ protected virtual void Run()
136136
//if true, The error log of the Native side OpenCV will be displayed on the Unity Editor Console.
137137
Utils.setDebugMode(true);
138138

139-
if (string.IsNullOrEmpty(model_filepath) || string.IsNullOrEmpty(classes_filepath))
139+
if (string.IsNullOrEmpty(model_filepath))
140140
{
141-
Debug.LogError("model: " + model + " or " + "classes: " + classes + " is not loaded.");
141+
Debug.LogError("model: " + model + " is not loaded.");
142142
}
143143
else
144144
{

0 commit comments

Comments
 (0)