当前位置:网站首页>Generate training set and verification set (Yolo) by using existing label files
Generate training set and verification set (Yolo) by using existing label files
2022-04-21 09:33:00 【Lvcx】
1. If the label file is xml Format ( namely VOC Format )
- You can use the following program to convert directly , The generated label file is txt Format ( namely YOLO Format ):( Be careful xml The encoding format in should be utf-8)
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import random
from shutil import copyfile
classes = ["hat", "person"]
#classes=["ball"]
TRAIN_RATIO = 80
def clear_hidden_files(path):
dir_list = os.listdir(path)
for i in dir_list:
abspath = os.path.join(os.path.abspath(path), i)
if os.path.isfile(abspath):
if i.startswith("._"):
os.remove(abspath)
else:
clear_hidden_files(abspath)
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
in_file = open('VOCdevkit/VOC2007/Annotations/%s.xml' %image_id)
out_file = open('VOCdevkit/VOC2007/YOLOLabels/%s.txt' %image_id, 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
in_file.close()
out_file.close()
wd = os.getcwd()
wd = os.getcwd()
data_base_dir = os.path.join(wd, "VOCdevkit/")
if not os.path.isdir(data_base_dir):
os.mkdir(data_base_dir)
work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")
if not os.path.isdir(work_sapce_dir):
os.mkdir(work_sapce_dir)
annotation_dir = os.path.join(work_sapce_dir, "Annotations/")
if not os.path.isdir(annotation_dir):
os.mkdir(annotation_dir)
clear_hidden_files(annotation_dir)
image_dir = os.path.join(work_sapce_dir, "JPEGImages/")
if not os.path.isdir(image_dir):
os.mkdir(image_dir)
clear_hidden_files(image_dir)
yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")
if not os.path.isdir(yolo_labels_dir):
os.mkdir(yolo_labels_dir)
clear_hidden_files(yolo_labels_dir)
yolov5_images_dir = os.path.join(data_base_dir, "images/")
if not os.path.isdir(yolov5_images_dir):
os.mkdir(yolov5_images_dir)
clear_hidden_files(yolov5_images_dir)
yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
if not os.path.isdir(yolov5_labels_dir):
os.mkdir(yolov5_labels_dir)
clear_hidden_files(yolov5_labels_dir)
yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
if not os.path.isdir(yolov5_images_train_dir):
os.mkdir(yolov5_images_train_dir)
clear_hidden_files(yolov5_images_train_dir)
yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
if not os.path.isdir(yolov5_images_test_dir):
os.mkdir(yolov5_images_test_dir)
clear_hidden_files(yolov5_images_test_dir)
yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
if not os.path.isdir(yolov5_labels_train_dir):
os.mkdir(yolov5_labels_train_dir)
clear_hidden_files(yolov5_labels_train_dir)
yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
if not os.path.isdir(yolov5_labels_test_dir):
os.mkdir(yolov5_labels_test_dir)
clear_hidden_files(yolov5_labels_test_dir)
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
train_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
list_imgs = os.listdir(image_dir) # list image files
prob = random.randint(1, 100)
print("Probability: %d" % prob)
for i in range(0,len(list_imgs)):
path = os.path.join(image_dir,list_imgs[i])
if os.path.isfile(path):
image_path = image_dir + list_imgs[i]
voc_path = list_imgs[i]
(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
annotation_name = nameWithoutExtention + '.xml'
annotation_path = os.path.join(annotation_dir, annotation_name)
label_name = nameWithoutExtention + '.txt'
label_path = os.path.join(yolo_labels_dir, label_name)
prob = random.randint(1, 100)
print("Probability: %d" % prob)
if(prob < TRAIN_RATIO): # train dataset
if os.path.exists(annotation_path):
train_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_train_dir + voc_path)
copyfile(label_path, yolov5_labels_train_dir + label_name)
else: # test dataset
if os.path.exists(annotation_path):
test_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_test_dir + voc_path)
copyfile(label_path, yolov5_labels_test_dir + label_name)
train_file.close()
test_file.close()
Directory structure :

2. If there is itself yolo Format label file , But the training set and verification set are not divided
- You need to yolo Format label file is converted to xml Format tags , Then use the above program to convert .
- The following procedure is the original yolo Convert label files to xml form :
# coding=utf-8
# Reference article :https://blog.csdn.net/didiaopao/article/details/120022845
# Ideas : You need to txt The label is converted to xml label , Then transform to generate test set and verification set
# https://www.bilibili.com/video/BV1f44y187Xg?p=4
from xml.dom.minidom import Document
import os
import cv2
# def makexml(txtPath, xmlPath, picPath): # txt Folder path ,xml File save path , The path of the folder where the picture is located
def makexml(picPath, txtPath, xmlPath): # txt Folder path ,xml File save path , The path of the folder where the picture is located
""" This function is used to yolo Format txt Label files are converted to voc Format xml Mark the file Create three subfolders under your own label picture folder , Named as picture、txt、xml """
# Because my tags have a total of 28 class , So you need to add the following twenty-eight :
dic = {
'0': "6_daming", # Create a dictionary to convert types
'1': "7_erhuaming", # The dictionary here should be consistent with your own classes.txt The classes in the file correspond to , And in the same order
'2': "8_daozongjuanyeming",
'3': "9_baibeifeishi",
'4': "10_hefeishishu",
'5': "25_dilaohu",
'6': "41_lougu",
'7': "105_nianchong",
'8': "110_caodiming",
'9': "115_tiancaiyee",
'10': "148_huangzuliechun",
'11': "156_badianhuidenge",
'12': "222_mianlingchong",
'13': "228_erdianweiyee",
'14': "235_ganlanyee",
'15': "256_xishuai",
'16': "280_huangdue",
'17': "310_daominglin",
'18': "387_zhitiaochie",
'19': "392_shuiminge",
'20': "394_xianweiyee",
'21': "398_caibaidaiyeming",
'22': "401_qijiaoming",
'23': "402_guajuanyeming",
'24': "430_douyeming",
'25': "480_shie",
'26': "485_daheisaijinggui",
'27': "673_ganwendongyee",
}
files = os.listdir(txtPath)
for i, name in enumerate(files):
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation") # establish annotation label
xmlBuilder.appendChild(annotation)
txtFile = open(txtPath + name, encoding="utf-8")
txtList = txtFile.readlines()
img = cv2.imread(picPath + name[0:-4] + ".jpg")
Pheight, Pwidth, Pdepth = img.shape
folder = xmlBuilder.createElement("folder") # folder label
foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
folder.appendChild(foldercontent)
annotation.appendChild(folder) # folder End of label
filename = xmlBuilder.createElement("filename") # filename label
filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
filename.appendChild(filenamecontent)
annotation.appendChild(filename) # filename End of label
size = xmlBuilder.createElement("size") # size label
width = xmlBuilder.createElement("width") # size Child tags width
widthcontent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthcontent)
size.appendChild(width) # size Child tags width end
height = xmlBuilder.createElement("height") # size Child tags height
heightcontent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightcontent)
size.appendChild(height) # size Child tags height end
depth = xmlBuilder.createElement("depth") # size Child tags depth
depthcontent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthcontent)
size.appendChild(depth) # size Child tags depth end
annotation.appendChild(size) # size End of label
for j in txtList:
oneline = j.strip().split(" ")
object = xmlBuilder.createElement("object") # object label
picname = xmlBuilder.createElement("name") # name label
namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
picname.appendChild(namecontent)
object.appendChild(picname) # name End of label
pose = xmlBuilder.createElement("pose") # pose label
posecontent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(posecontent)
object.appendChild(pose) # pose End of label
truncated = xmlBuilder.createElement("truncated") # truncated label
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated) # truncated End of label
difficult = xmlBuilder.createElement("difficult") # difficult label
difficultcontent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultcontent)
object.appendChild(difficult) # difficult End of label
bndbox = xmlBuilder.createElement("bndbox") # bndbox label
xmin = xmlBuilder.createElement("xmin") # xmin label
mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin) # xmin End of label
ymin = xmlBuilder.createElement("ymin") # ymin label
mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin) # ymin End of label
xmax = xmlBuilder.createElement("xmax") # xmax label
mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax) # xmax End of label
ymax = xmlBuilder.createElement("ymax") # ymax label
mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax) # ymax End of label
object.appendChild(bndbox) # bndbox End of label
annotation.appendChild(object) # object End of label
f = open(xmlPath + name[0:-4] + ".xml", 'w', encoding="gbk")
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='gbk')
f.close()
if __name__ == "__main__":
picPath = "D:/YOLO-V5_Project/datasets/images/" # The path of the folder where the picture is located , hinder / You have to take it
txtPath = "D:/YOLO-V5_Project/datasets/labels/" # txt Folder path , hinder / You have to take it
xmlPath = "D:/YOLO-V5_Project/datasets/Annotations/" # xml File save path , hinder / You have to take it
makexml(picPath, txtPath, xmlPath)
Result of operation :

Original address :https://blog.csdn.net/didiaopao/article/details/120022845
版权声明
本文为[Lvcx]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/04/202204210927378077.html
边栏推荐
- 【笔记】.launch文件语法记录
- 1169: large integer (pointer)
- [Yugong series] wechat applet - API related function case of map use in April 2022
- Kali:sqlmap :[10:39:37] [CRITICAL] unable to connect to the target URL
- 1149: 组合三位数之二
- 1147: 查找子数组
- Yapi basic use (2022-04-15)
- 基于WebSocket实现一个简易的群聊功能
- Actf2020 freshman tournament upload 1
- 【栈和队列】C语言简单应用 ⌊栈和队列互相实现,循环队列⌉
猜你喜欢

Responsive layout to realize the static page of ghost blog home page

Grid layout -- grid

vr全景适合那些行业

控制另一个程序的启动、隐藏、显示、关闭

操作系统 - 线程安全 - 学习

【笔记】.launch文件语法记录

2022 refrigeration and air conditioning equipment operation test question simulation test question bank and answers

2022 a special equipment related management (elevator) test question simulation test platform operation
Serviceworker cache and HTTP cache

事务的隔离级别与MVCC
随机推荐
【栈和队列】C语言简单应用 ⌊栈和队列互相实现,循环队列⌉
In 2017, I also started to write CSDN blog (Sina Netease moved to CSDN)
给网站添加pjax无刷新,换页音乐不中断
Install MySQL in docker under CentOS
【笔记】.launch文件语法记录
My life of Honker Security Commando
My blog navigation directory (constantly sorting and updating...)
Advanced C language - dynamic memory management
2022年危险化学品生产单位安全生产管理人员特种作业证考试题库模拟考试平台操作
1155: multiple instances of string
[Yugong series] wechat applet - Online aggregation of maps in April 2022
【手拉手 带你准备电赛】近期小总结
CC10000. CloudJenkins—————————————
ZABBIX 5.4 server installation
1164: string encryption
Fashion cloud learning -js implementation disables right click and F12
复旦大学-华盛顿大学EMBA校友:将"她力量"变成"我们的力量"
1148: 组合三位数之一
云网融合 — 算力中心 — RoCE/RMDA 与 NVMe/NVMe-oF
刷题记录(leetcode)