当前位置:网站首页>利用已有的标签文件生成训练集和验证集(YOLO)
利用已有的标签文件生成训练集和验证集(YOLO)
2022-04-21 09:27:00 【Lvcx】
1. 如果标签文件是xml格式(即VOC格式)
- 可以利用以下程序直接进行转换,生成的标签文件是txt格式(即YOLO格式):(注意xml中的编码格式应该是utf-8)
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import random
from shutil import copyfile
classes = ["hat", "person"]
#classes=["ball"]
TRAIN_RATIO = 80
def clear_hidden_files(path):
dir_list = os.listdir(path)
for i in dir_list:
abspath = os.path.join(os.path.abspath(path), i)
if os.path.isfile(abspath):
if i.startswith("._"):
os.remove(abspath)
else:
clear_hidden_files(abspath)
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(image_id):
in_file = open('VOCdevkit/VOC2007/Annotations/%s.xml' %image_id)
out_file = open('VOCdevkit/VOC2007/YOLOLabels/%s.txt' %image_id, 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
in_file.close()
out_file.close()
wd = os.getcwd()
wd = os.getcwd()
data_base_dir = os.path.join(wd, "VOCdevkit/")
if not os.path.isdir(data_base_dir):
os.mkdir(data_base_dir)
work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")
if not os.path.isdir(work_sapce_dir):
os.mkdir(work_sapce_dir)
annotation_dir = os.path.join(work_sapce_dir, "Annotations/")
if not os.path.isdir(annotation_dir):
os.mkdir(annotation_dir)
clear_hidden_files(annotation_dir)
image_dir = os.path.join(work_sapce_dir, "JPEGImages/")
if not os.path.isdir(image_dir):
os.mkdir(image_dir)
clear_hidden_files(image_dir)
yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")
if not os.path.isdir(yolo_labels_dir):
os.mkdir(yolo_labels_dir)
clear_hidden_files(yolo_labels_dir)
yolov5_images_dir = os.path.join(data_base_dir, "images/")
if not os.path.isdir(yolov5_images_dir):
os.mkdir(yolov5_images_dir)
clear_hidden_files(yolov5_images_dir)
yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
if not os.path.isdir(yolov5_labels_dir):
os.mkdir(yolov5_labels_dir)
clear_hidden_files(yolov5_labels_dir)
yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
if not os.path.isdir(yolov5_images_train_dir):
os.mkdir(yolov5_images_train_dir)
clear_hidden_files(yolov5_images_train_dir)
yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
if not os.path.isdir(yolov5_images_test_dir):
os.mkdir(yolov5_images_test_dir)
clear_hidden_files(yolov5_images_test_dir)
yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
if not os.path.isdir(yolov5_labels_train_dir):
os.mkdir(yolov5_labels_train_dir)
clear_hidden_files(yolov5_labels_train_dir)
yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
if not os.path.isdir(yolov5_labels_test_dir):
os.mkdir(yolov5_labels_test_dir)
clear_hidden_files(yolov5_labels_test_dir)
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
train_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
list_imgs = os.listdir(image_dir) # list image files
prob = random.randint(1, 100)
print("Probability: %d" % prob)
for i in range(0,len(list_imgs)):
path = os.path.join(image_dir,list_imgs[i])
if os.path.isfile(path):
image_path = image_dir + list_imgs[i]
voc_path = list_imgs[i]
(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
annotation_name = nameWithoutExtention + '.xml'
annotation_path = os.path.join(annotation_dir, annotation_name)
label_name = nameWithoutExtention + '.txt'
label_path = os.path.join(yolo_labels_dir, label_name)
prob = random.randint(1, 100)
print("Probability: %d" % prob)
if(prob < TRAIN_RATIO): # train dataset
if os.path.exists(annotation_path):
train_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_train_dir + voc_path)
copyfile(label_path, yolov5_labels_train_dir + label_name)
else: # test dataset
if os.path.exists(annotation_path):
test_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_test_dir + voc_path)
copyfile(label_path, yolov5_labels_test_dir + label_name)
train_file.close()
test_file.close()
目录结构:

2. 如果本身有yolo格式的标签文件,但未划分训练集和验证集
- 需要先将yolo格式的标签文件转换成xml格式的标签,然后再将用上面的程序再进行转换。
- 下列程序是将原有的yolo标签文件转换成xml形式:
# coding=utf-8
# 参考文章:https://blog.csdn.net/didiaopao/article/details/120022845
# 思路:需要先将txt标签转换成xml标签,然后再进行转换生成测试集和验证集
# https://www.bilibili.com/video/BV1f44y187Xg?p=4
from xml.dom.minidom import Document
import os
import cv2
# def makexml(txtPath, xmlPath, picPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
def makexml(picPath, txtPath, xmlPath): # txt所在文件夹路径,xml文件保存路径,图片所在文件夹路径
"""此函数用于将yolo格式txt标注文件转换为voc格式xml标注文件 在自己的标注图片文件夹下建三个子文件夹,分别命名为picture、txt、xml """
# 由于我的标签总共有28类,所以就需要添加下列二十八个:
dic = {
'0': "6_daming", # 创建字典用来对类型进行转换
'1': "7_erhuaming", # 此处的字典要与自己的classes.txt文件中的类对应,且顺序要一致
'2': "8_daozongjuanyeming",
'3': "9_baibeifeishi",
'4': "10_hefeishishu",
'5': "25_dilaohu",
'6': "41_lougu",
'7': "105_nianchong",
'8': "110_caodiming",
'9': "115_tiancaiyee",
'10': "148_huangzuliechun",
'11': "156_badianhuidenge",
'12': "222_mianlingchong",
'13': "228_erdianweiyee",
'14': "235_ganlanyee",
'15': "256_xishuai",
'16': "280_huangdue",
'17': "310_daominglin",
'18': "387_zhitiaochie",
'19': "392_shuiminge",
'20': "394_xianweiyee",
'21': "398_caibaidaiyeming",
'22': "401_qijiaoming",
'23': "402_guajuanyeming",
'24': "430_douyeming",
'25': "480_shie",
'26': "485_daheisaijinggui",
'27': "673_ganwendongyee",
}
files = os.listdir(txtPath)
for i, name in enumerate(files):
xmlBuilder = Document()
annotation = xmlBuilder.createElement("annotation") # 创建annotation标签
xmlBuilder.appendChild(annotation)
txtFile = open(txtPath + name, encoding="utf-8")
txtList = txtFile.readlines()
img = cv2.imread(picPath + name[0:-4] + ".jpg")
Pheight, Pwidth, Pdepth = img.shape
folder = xmlBuilder.createElement("folder") # folder标签
foldercontent = xmlBuilder.createTextNode("driving_annotation_dataset")
folder.appendChild(foldercontent)
annotation.appendChild(folder) # folder标签结束
filename = xmlBuilder.createElement("filename") # filename标签
filenamecontent = xmlBuilder.createTextNode(name[0:-4] + ".jpg")
filename.appendChild(filenamecontent)
annotation.appendChild(filename) # filename标签结束
size = xmlBuilder.createElement("size") # size标签
width = xmlBuilder.createElement("width") # size子标签width
widthcontent = xmlBuilder.createTextNode(str(Pwidth))
width.appendChild(widthcontent)
size.appendChild(width) # size子标签width结束
height = xmlBuilder.createElement("height") # size子标签height
heightcontent = xmlBuilder.createTextNode(str(Pheight))
height.appendChild(heightcontent)
size.appendChild(height) # size子标签height结束
depth = xmlBuilder.createElement("depth") # size子标签depth
depthcontent = xmlBuilder.createTextNode(str(Pdepth))
depth.appendChild(depthcontent)
size.appendChild(depth) # size子标签depth结束
annotation.appendChild(size) # size标签结束
for j in txtList:
oneline = j.strip().split(" ")
object = xmlBuilder.createElement("object") # object 标签
picname = xmlBuilder.createElement("name") # name标签
namecontent = xmlBuilder.createTextNode(dic[oneline[0]])
picname.appendChild(namecontent)
object.appendChild(picname) # name标签结束
pose = xmlBuilder.createElement("pose") # pose标签
posecontent = xmlBuilder.createTextNode("Unspecified")
pose.appendChild(posecontent)
object.appendChild(pose) # pose标签结束
truncated = xmlBuilder.createElement("truncated") # truncated标签
truncatedContent = xmlBuilder.createTextNode("0")
truncated.appendChild(truncatedContent)
object.appendChild(truncated) # truncated标签结束
difficult = xmlBuilder.createElement("difficult") # difficult标签
difficultcontent = xmlBuilder.createTextNode("0")
difficult.appendChild(difficultcontent)
object.appendChild(difficult) # difficult标签结束
bndbox = xmlBuilder.createElement("bndbox") # bndbox标签
xmin = xmlBuilder.createElement("xmin") # xmin标签
mathData = int(((float(oneline[1])) * Pwidth + 1) - (float(oneline[3])) * 0.5 * Pwidth)
xminContent = xmlBuilder.createTextNode(str(mathData))
xmin.appendChild(xminContent)
bndbox.appendChild(xmin) # xmin标签结束
ymin = xmlBuilder.createElement("ymin") # ymin标签
mathData = int(((float(oneline[2])) * Pheight + 1) - (float(oneline[4])) * 0.5 * Pheight)
yminContent = xmlBuilder.createTextNode(str(mathData))
ymin.appendChild(yminContent)
bndbox.appendChild(ymin) # ymin标签结束
xmax = xmlBuilder.createElement("xmax") # xmax标签
mathData = int(((float(oneline[1])) * Pwidth + 1) + (float(oneline[3])) * 0.5 * Pwidth)
xmaxContent = xmlBuilder.createTextNode(str(mathData))
xmax.appendChild(xmaxContent)
bndbox.appendChild(xmax) # xmax标签结束
ymax = xmlBuilder.createElement("ymax") # ymax标签
mathData = int(((float(oneline[2])) * Pheight + 1) + (float(oneline[4])) * 0.5 * Pheight)
ymaxContent = xmlBuilder.createTextNode(str(mathData))
ymax.appendChild(ymaxContent)
bndbox.appendChild(ymax) # ymax标签结束
object.appendChild(bndbox) # bndbox标签结束
annotation.appendChild(object) # object标签结束
f = open(xmlPath + name[0:-4] + ".xml", 'w', encoding="gbk")
xmlBuilder.writexml(f, indent='\t', newl='\n', addindent='\t', encoding='gbk')
f.close()
if __name__ == "__main__":
picPath = "D:/YOLO-V5_Project/datasets/images/" # 图片所在文件夹路径,后面的/一定要带上
txtPath = "D:/YOLO-V5_Project/datasets/labels/" # txt所在文件夹路径,后面的/一定要带上
xmlPath = "D:/YOLO-V5_Project/datasets/Annotations/" # xml文件保存路径,后面的/一定要带上
makexml(picPath, txtPath, xmlPath)
运行的结果:

原文地址:https://blog.csdn.net/didiaopao/article/details/120022845
版权声明
本文为[Lvcx]所创,转载请带上原文链接,感谢
https://blog.csdn.net/ungoing/article/details/124288529
边栏推荐
- 1171: 加密(指针专题)
- Open3d reads and writes PCD point cloud files
- Handler异步消息传递机制(一)Handler常用基本用法
- [appium] use the simulator to realize the business functions of Youdao cloud app - add, search, modify and delete
- 1146: eat candy
- Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer--T Li
- 1150: how many integers
- 1164: string encryption
- PageRank case Airport
- My life of Honker Security Commando
猜你喜欢

网格布局--grid

Penetration test - roaming from public cloud to intranet rce deserialization FRP
纯c语言链表实现学生信息管理系统.(你学会了吗?)

Download the first analysis report on China's database industry!

Transaction isolation level and mvcc

Notes of the most complete grain mall in the whole network_ 02. Introduction to the overall effect of the project (2022-04-02)

CC10000.CloudJenkins—————————————

Meizu, once expected to challenge apple, now lives by providing accessories for Apple users

Getting started with object detection FAQs (deep learning / image classification)

Intranet penetration - proxy penetration - rights lifting - injection - MSF Middleware - domain penetration - log clearing - learning resources
随机推荐
目标检测入门常见问题(深度学习 / 图像分类)
I use ehcache local cache to improve the query performance by 100 times. It's really fragrant!
PageRank case Airport
Pyinstaller package exe (detailed tutorial)
1147: find subarray
Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer--T Li
Surfaceview high performance rendering (IV) code practice - drawing multiple pictures
我的博客导航目录(持续整理更新中。。。)
编程如何提高自己的水平能力?学编程最重要的是什么?请看凡人浅谈如何学C
1167: number of reversals (pointer topic)
CC10000. CloudJenkins—————————————
Penetration practice - no echo rce thinkphp5 getshell
Handler异步消息传递机制(二)在子线程中创建Handler
报告解读下载 | 首份《中国数据库行业分析报告》重磅发布!
Dark blue - Visual slam - Section 6 exercise
1149: 组合三位数之二
Actf2020 freshman tournament upload 1
1161: 字符串长度(指针专题)
Note 0104 MySQL advanced - index - Overview
synchronized真的很重么?