当前位置：网站首页>Opencv -- yoact case segmentation model reasoning

Opencv -- yoact case segmentation model reasoning

2022-04-23 03:57:00 【qq_ thirty-seven million one hundred and seventy-two thousand o】

Subject requirements ： understand opencv Of DNN library , Give a picture of a natural scene , Use the trained yolact Model , Output the target detection results .

analysis ：
1）opencv Of DNN The module integrates many deep learning models , Including face detection 、 Image classification 、 Division 、 Target detection, etc , Integrated Pytorch、tensorflow、paddlepaddle Equal model framework （ See code base OpenCV/dnn）
2） General steps of deep learning reasoning model ： Load model , Including configuration file and weight file ; Input image preprocessing , Convert to file types and sizes acceptable to the model ; Model prediction post-processing , For instance segmentation , Mainly NMS Post processing method and segmentation ,YOLACT These post-processing processes have been completed automatically ;

Result display ：

main.exe -h
#######
Usage: main.exe [params] image confThreshold nmsThresshold 

        -?, -h, --help, --usage (value:true)
                opecv based deep learining demo

        image
                Image to process
        confThreshold (value:0.5)
                confidence threshold, default 0.5
        nmsThresshold (value:0.5)
                nms threshold, default 0.3

 main.exe example_01.jpg

Code example ：

// main.cpp //
#include <fstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "yolact.cpp"

using namespace cv;
using namespace dnn;
using namespace std;

bool parseParam(int argc, char** argv, const char* keys, Mat& img, float& confThread, float& nmsTheshold){
    
	CommandLineParser parser(argc, argv, keys);
	if(parser.has("help")){
    
		parser.printMessage();
		return false;
	}
	if(!parser.check()){
    
		parser.printErrors();
		return false;
	}
	String imgFile = parser.get<String>(0);
	img = imread(imgFile);
	if(img.empty()){
    
		cout << "wrong image path ! please check again." << endl;
		return false;
	}
	confThread = parser.get<float>(1);
	nmsTheshold = parser.get<float>(2);
	return true;
}

int main(int argc, char** argv)
{
    	
	const char* keys  = {
    
        "{help h usage ? | | opecv based deep learining demo}"
        "{@image | | Image to process}"
        "{@confThreshold | 0.5 | confidence threshold, default 0.5}"
        "{@nmsThresshold | 0.3 | nms threshold, default 0.3}"};
	float confThreshold, nmsThreshold;
	Mat srcimg;
	if(!parseParam(argc, argv, keys, srcimg, confThreshold, nmsThreshold)){
    
        return 0;
    }
	yolact yolactnet(confThreshold, nmsThreshold);
	
	yolactnet.detect(srcimg);

	static const string kWinName = "Deep learning object detection in OpenCV";
	namedWindow(kWinName, WINDOW_AUTOSIZE);
	imshow(kWinName, srcimg);
	waitKey(0);
	destroyAllWindows();
}

// yolact.cpp
#include <fstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "config.cpp"

using namespace cv;
using namespace dnn;
using namespace std;

class yolact
{
    
	public:
		yolact(float confThreshold, float nmsThreshold, const int keep_top_k = 200);
		void detect(Mat& srcimg);
	private:
		const int target_size = 550;
		const float MEANS[3] = {
     123.68, 116.78, 103.94 };
		const float STD[3] = {
     58.40, 57.12, 57.38 };
		float confidence_threshold;
		float nms_threshold;
		int keep_top_k;
		const int conv_ws[5] = {
     69, 35, 18, 9, 5 };
		const int conv_hs[5] = {
     69, 35, 18, 9, 5 };
		const float aspect_ratios[3] = {
     1.f, 0.5f, 2.f };
		const float scales[5] = {
     24.f, 48.f, 96.f, 192.f, 384.f };
		const float var[4] = {
     0.1f, 0.1f, 0.2f, 0.2f };
		const int mask_h = 138;
		const int mask_w = 138;
		int num_priors;
		float* priorbox;
		Net net;
		void normalize(Mat& img);
		void sigmoid(Mat& out, int length);
};

yolact::yolact(float confThreshold, float nmsThreshold, const int keep_top_k)
{
    
	this->confidence_threshold = confThreshold;
	this->nms_threshold = nmsThreshold;
	this->keep_top_k = keep_top_k;
	this->net = readNet("yolact_base_54_800000.onnx");
	this->num_priors = 0;
	int p = 0;
	for (p = 0; p < 5; p++)
	{
    
		this->num_priors += this->conv_ws[p] * this->conv_hs[p] * 3;
	}
	this->priorbox = new float[4 * this->num_priors];
	generate priorbox
	float* pb = priorbox;
	for (p = 0; p < 5; p++)
	{
    
		int conv_w = this->conv_ws[p];
		int conv_h = this->conv_hs[p];

		float scale = this->scales[p];

		for (int i = 0; i < conv_h; i++)
		{
    
			for (int j = 0; j < conv_w; j++)
			{
    
				// +0.5 because priors are in center-size notation
				float cx = (j + 0.5f) / conv_w;
				float cy = (i + 0.5f) / conv_h;

				for (int k = 0; k < 3; k++)
				{
    
					float ar = aspect_ratios[k];

					ar = sqrt(ar);

					float w = scale * ar / this->target_size;
					float h = scale / ar / this->target_size;

					// This is for backward compatability with a bug where I made everything square by accident
					// cfg.backbone.use_square_anchors:
					h = w;
					pb[0] = cx;
					pb[1] = cy;
					pb[2] = w;
					pb[3] = h;
					pb += 4;
				}
			}
		}
	}
}

void yolact::normalize(Mat& img)
{
    
	img.convertTo(img, CV_32F);
	int i = 0, j = 0;
	for (i = 0; i < img.rows; i++)
	{
    
		float* pdata = (float*)(img.data + i * img.step);
		for (j = 0; j < img.cols; j++)
		{
    
			pdata[0] = (pdata[0] - this->MEANS[0]) / this->STD[0];
			pdata[1] = (pdata[1] - this->MEANS[1]) / this->STD[1];
			pdata[2] = (pdata[2] - this->MEANS[2]) / this->STD[2];
			pdata += 3;
		}
	}
}

void yolact::sigmoid(Mat& out, int length)
{
    
	float* pdata = (float*)(out.data);
	int i = 0;
	for (i = 0; i < length; i++)
	{
    
		pdata[i] = 1.0 / (1 + expf(-pdata[i]));
	}
}

void yolact::detect(Mat& srcimg)
{
    
	int img_w = srcimg.cols;
	int img_h = srcimg.rows;
	Mat img;
	resize(srcimg, img, Size(this->target_size, this->target_size), INTER_LINEAR);
	cvtColor(img, img, COLOR_BGR2RGB);
	this->normalize(img);
	Mat blob = blobFromImage(img);
	this->net.setInput(blob);
	vector<Mat> outs;
	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
	
	/generate proposals
	vector<int> classIds;
	vector<float> confidences;
	vector<Rect> boxes;
	vector<int> maskIds;
	const int num_class = outs[1].cols;
	for (int i = 0; i < this->num_priors; i++)
	{
    
		Mat scores = outs[1].row(i).colRange(1, num_class);
		Point classIdPoint;
		double score;
		// Get the value and location of the maximum score
		minMaxLoc(scores, 0, &score, 0, &classIdPoint);
		if (score > this->confidence_threshold)
		{
    
			const float* loc = (float*)outs[0].data + i * 4;
			const float* pb = this->priorbox + i * 4;
			float pb_cx = pb[0];
			float pb_cy = pb[1];
			float pb_w = pb[2];
			float pb_h = pb[3];

			float bbox_cx = var[0] * loc[0] * pb_w + pb_cx;
			float bbox_cy = var[1] * loc[1] * pb_h + pb_cy;
			float bbox_w = (float)(exp(var[2] * loc[2]) * pb_w);
			float bbox_h = (float)(exp(var[3] * loc[3]) * pb_h);
			float obj_x1 = bbox_cx - bbox_w * 0.5f;
			float obj_y1 = bbox_cy - bbox_h * 0.5f;
			float obj_x2 = bbox_cx + bbox_w * 0.5f;
			float obj_y2 = bbox_cy + bbox_h * 0.5f;

			// clip
			obj_x1 = max(min(obj_x1 * img_w, (float)(img_w - 1)), 0.f);
			obj_y1 = max(min(obj_y1 * img_h, (float)(img_h - 1)), 0.f);
			obj_x2 = max(min(obj_x2 * img_w, (float)(img_w - 1)), 0.f);
			obj_y2 = max(min(obj_y2 * img_h, (float)(img_h - 1)), 0.f);
			classIds.push_back(classIdPoint.x);
			confidences.push_back(score);
			boxes.push_back(Rect((int)obj_x1, (int)obj_y1, (int)(obj_x2 - obj_x1 + 1), (int)(obj_y2 - obj_y1 + 1)));
			maskIds.push_back(i);
		}
	}

	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector<int> indices;
	NMSBoxes(boxes, confidences, this->confidence_threshold, this->nms_threshold, indices, 1.f, this->keep_top_k);
	for (size_t i = 0; i < indices.size(); ++i)
	{
    
		int idx = indices[i];
		Rect box = boxes[idx];
		int xmax = box.x + box.width;
		int ymax = box.y + box.height;
		rectangle(srcimg, Point(box.x, box.y), Point(xmax, ymax), Scalar(0, 0, 255), 3);
		//Get the label for the class name and its confidence
		char text[256];
		sprintf(text, "%s: %.2f", class_names[classIds[idx] + 1], confidences[idx]);


		//Display the label at the top of the bounding box
		int baseLine;
		Size labelSize = getTextSize(text, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
		int ymin = max(box.y, labelSize.height);
		//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
		putText(srcimg, text, Point(box.x, ymin), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);

		Mat mask(this->mask_h, this->mask_w, CV_32FC1);
		mask = cv::Scalar(0.f);
		int channel = outs[2].cols;
		int area = this->mask_h * this->mask_w;
		float* coeff = (float*)outs[2].data + maskIds[idx] * channel;
		float* pm = (float*)mask.data;
		const float* pmaskmap = (float*)outs[3].data;
		for (int j = 0; j < area; j++)
		{
    
			for (int p = 0; p < channel; p++)
			{
    
				pm[j] += pmaskmap[p] * coeff[p];
			}
			pmaskmap += channel;
		}

		this->sigmoid(mask, area);
		Mat mask2;
		resize(mask, mask2, Size(img_w, img_h));
		// draw mask
		for (int y = 0; y < img_h; y++)
		{
    
			const float* pmask = (float*)mask2.data + y * img_w;
			uchar* p = srcimg.data + y * img_w * 3;
			for (int x = 0; x < img_w; x++)
			{
    
				if (pmask[x] > 0.5)
				{
    
					p[0] = (uchar)(p[0] * 0.5 + colors[classIds[idx] + 1][0] * 0.5);
					p[1] = (uchar)(p[1] * 0.5 + colors[classIds[idx] + 1][1] * 0.5);
					p[2] = (uchar)(p[2] * 0.5 + colors[classIds[idx] + 1][2] * 0.5);
				}
				p += 3;
			}
		}
	}
}

// config.hpp //
extern const char* class_names[];
extern const unsigned char colors[81][3];

// config.cpp //
#include"config.hpp"
extern const char* class_names[] = {
     "background",
										"person", "bicycle", "car", "motorcycle", "airplane", "bus",
										"train", "truck", "boat", "traffic light", "fire hydrant",
										"stop sign", "parking meter", "bench", "bird", "cat", "dog",
										"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
										"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
										"skis", "snowboard", "sports ball", "kite", "baseball bat",
										"baseball glove", "skateboard", "surfboard", "tennis racket",
										"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
										"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
										"hot dog", "pizza", "donut", "cake", "chair", "couch",
										"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
										"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
										"toaster", "sink", "refrigerator", "book", "clock", "vase",
										"scissors", "teddy bear", "hair drier", "toothbrush"
};

extern const unsigned char colors[81][3] = {
    {
    56, 0, 255}, {
    226, 255, 0}, {
    0, 94, 255},
	{
    0, 37, 255}, {
    0, 255, 94}, {
    255, 226, 0}, {
    0, 18, 255}, {
    255, 151, 0},
	{
    170, 0, 255}, {
    0, 255, 56}, {
    255, 0, 75}, {
    0, 75, 255}, {
    0, 255, 169},
	{
    255, 0, 207}, {
    75, 255, 0}, {
    207, 0, 255}, {
    37, 0, 255}, {
    0, 207, 255},
	{
    94, 0, 255}, {
    0, 255, 113}, {
    255, 18, 0}, {
    255, 0, 56}, {
    18, 0, 255},
	{
    0, 255, 226}, {
    170, 255, 0}, {
    255, 0, 245}, {
    151, 255, 0}, {
    132, 255, 0},
	{
    75, 0, 255}, {
    151, 0, 255}, {
    0, 151, 255}, {
    132, 0, 255}, {
    0, 255, 245},
	{
    255, 132, 0}, {
    226, 0, 255}, {
    255, 37, 0}, {
    207, 255, 0},
	{
    0, 255, 207}, {
    94, 255, 0}, {
    0, 226, 255},
	{
    56, 255, 0}, {
    255, 94, 0}, {
    255, 113, 0},{
    0, 132, 255}, {
    255, 0, 132},
	{
    255, 170, 0}, {
    255, 0, 188}, {
    113, 255, 0}, {
    245, 0, 255}, {
    113, 0, 255},
	{
    255, 188, 0}, {
    0, 113, 255}, {
    255, 0, 0}, {
    0, 56, 255}, {
    255, 0, 113},
	{
    0, 255, 188}, {
    255, 0, 94}, {
    255, 0, 18}, {
    18, 255, 0}, {
    0, 255, 132},
	{
    0, 188, 255}, {
    0, 245, 255}, {
    0, 169, 255},{
    37, 255, 0},
	{
    255, 0, 151}, {
    188, 0, 255}, {
    0, 255, 37}, {
    0, 255, 0},
	{
    255, 0, 170}, {
    255, 0, 37}, {
    255, 75, 0}, {
    0, 0, 255}, {
    255, 207, 0},
	{
    255, 0, 226}, {
    255, 245, 0}, {
    188, 255, 0}, {
    0, 255, 18}, {
    0, 255, 75},
	{
    0, 255, 151}, {
    255, 56, 0}, {
    245, 255, 0}
};

版权声明
本文为[qq_ thirty-seven million one hundred and seventy-two thousand o]所创，转载请带上原文链接，感谢
https://yzsam.com/2022/04/202204230350397758.html

当前位置：网站首页>Opencv -- yoact case segmentation model reasoning

Opencv -- yoact case segmentation model reasoning

边栏推荐

猜你喜欢

随机推荐