当前位置:网站首页>制作实例分割数据集
制作实例分割数据集
2022-08-08 19:25:00 【SmileAtlas】
项目结构
├── data # 数据集
│ ├── data_annotated # 标注的文件
│ │ ├── 0000.bmp
│ │ ├── 0000.json
│ ├── creat_txt.py
│ ├── classify_file.py
│ ├── labelme2coco.py
│ ├── labels.txt
创建labels.txt
__ignore__
_background_
A
B
划分数据集creat_txt.py
# -*- coding: utf-8 -*-
import os
import glob
import random
root = os.getcwd()
data_annotated_path = os.path.join(root, 'data_annotated')
txt_save_path = root
json_file_path = glob.glob(os.path.join(data_annotated_path, '*.json'))
img_file_path = glob.glob(os.path.join(data_annotated_path, '*.bmp'))
assert len(json_file_path) == len(img_file_path)
trainval_percent = 1 # No test sample
train_percent = 0.9
num = len(json_file_path)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
f_trainval = open(os.path.join(root, 'trainval2017.txt'), 'w')
f_train = open(os.path.join(root, 'train2017.txt'), 'w')
f_val = open(os.path.join(root, 'val2017.txt'), 'w')
f_test = open(os.path.join(root, 'test2017.txt'), 'w')
for i in list:
name = os.path.basename(json_file_path[i]).split('.')[0] + '\n'
if i in trainval:
f_trainval.write(name)
if i in train:
f_train.write(name)
else:
f_val.write(name)
else:
f_test.write(name)
f_trainval.close()
f_train.close()
f_val.close()
f_test.close()
print('Create_txt Done')
得到以下几个文件
├── data # 数据集
│ ├── test2017.txt
│ ├── train2017.txt
│ ├── trainval2017.txt
│ ├── val2017.txt
拷贝到对应文件夹classify_file.py
import shutil
import os
import os.path as osp
sets = ['train2017', 'val2017', 'test2017']
for image_set in sets:
if osp.exists(image_set):
shutil.rmtree(image_set)
print('Deleted previous %s file and created a new one' % image_set)
os.makedirs(image_set)
json_path = '%s' % image_set
if osp.exists(json_path):
shutil.rmtree(json_path)
print('Deleted previous %s file and created a new one' % json_path)
os.makedirs(json_path)
image_ids = open('./%s.txt' % image_set).read().strip().split()
for image_id in image_ids:
img = 'data_annotated/%s.bmp' % image_id
json = 'data_annotated/%s.json' % image_id
shutil.copy(img, image_set)
shutil.copy(json, '%s/' % image_set)
print("Done")
处理后得到以下文件
├── data # 数据集
│ ├── train2017
│ │ ├── 0000.bmp
│ │ ├── 0000.json
│ │ ├── ···
│ ├── val2017
│ │ ├── 0001.bmp
│ │ ├── 0001.json
│ │ ├── ···
│ ├── test2017
转换为coco格式labelme2coco.py
import collections
import datetime
import glob
import json
import uuid
import os
import os.path as osp
import sys
import numpy as np
import imgviz
import labelme
import shutil
try:
import pycocotools.mask
except ImportError:
print('Please install pycocotools:\n\n pip install pycocotools\n')
sys.exit(1)
def main():
noviz = False
root = os.getcwd()
dataset = 'hanfengseg'
sets = ['train2017', 'val2017', 'test2017']
output_dir = osp.join(root, dataset)
if not osp.exists(output_dir):
os.makedirs(output_dir)
print('Creating dataset:', output_dir)
else:
shutil.rmtree(output_dir)
print('Output directory already exists:', output_dir)
if not osp.exists(osp.join(output_dir, 'annotations')):
os.makedirs(osp.join(output_dir, 'annotations'))
if not osp.exists(osp.join(output_dir, 'JPEGImages')):
os.makedirs(osp.join(output_dir, 'JPEGImages'))
if not osp.exists(osp.join(output_dir, 'Visualization')):
os.makedirs(osp.join(output_dir, 'Visualization'))
for set in sets:
input_dir = './%s' % set # ./train2017
filename = 'instances_%s' % set # instances_train2017
now = datetime.datetime.now()
data = dict(
info=dict(
description='HanFeng',
url=None,
version="5.0.1",
year="2022",
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None, )],
images=[
# license, url, file_name, height, width, date_captured, id
],
type="instances",
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
],
)
class_name_to_id = {}
for i, line in enumerate(open(osp.join(root, 'labels.txt')).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
if class_id == -1:
assert class_name == "__ignore__"
continue
class_name_to_id[class_name] = class_id
data["categories"].append(
dict(supercategory=None, id=class_id, name=class_name, )
)
out_ann_file = osp.join(output_dir, 'annotations', filename + '.json') # ./annotations\instances_test2017.json
label_files = glob.glob(osp.join(input_dir, '*.json')) # [./train2017\\0.json, ...]
for image_id, filename in enumerate(label_files):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(output_dir, "JPEGImages", set, base + ".jpg")
img = labelme.utils.img_data_to_arr(label_file.imageData)
imgviz.io.imsave(out_img_file, img)
data["images"].append(
dict(
license=0,
url=None,
file_name=base + ".jpg",
height=img.shape[0],
width=img.shape[1],
date_captured=None,
id=image_id,
)
)
masks = {} # for area
segmentations = collections.defaultdict(list) # for segmentation
for shape in label_file.shapes:
points = shape["points"]
label = shape["label"]
group_id = shape.get("group_id")
shape_type = shape.get("shape_type", "polygon")
mask = labelme.utils.shape_to_mask(
img.shape[:2], points, shape_type
)
if group_id is None:
group_id = uuid.uuid1()
instance = (label, group_id)
if instance in masks:
masks[instance] = masks[instance] | mask
else:
masks[instance] = mask
if shape_type == "rectangle":
(x1, y1), (x2, y2) = points
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [x1, y1, x2, y1, x2, y2, x1, y2]
if shape_type == "circle":
(x1, y1), (x2, y2) = points
r = np.linalg.norm([x2 - x1, y2 - y1])
# r(1-cos(a/2))<x, a=2*pi/N => N>pi/arccos(1-x/r)
# x: tolerance of the gap between the arc and the line segment
n_points_circle = max(int(np.pi / np.arccos(1 - 1 / r)), 12)
i = np.arange(n_points_circle)
x = x1 + r * np.sin(2 * np.pi / n_points_circle * i)
y = y1 + r * np.cos(2 * np.pi / n_points_circle * i)
points = np.stack((x, y), axis=1).flatten().tolist()
else:
points = np.asarray(points).flatten().tolist()
segmentations[instance].append(points)
segmentations = dict(segmentations)
for instance, mask in masks.items():
cls_name, group_id = instance
if cls_name not in class_name_to_id:
continue
cls_id = class_name_to_id[cls_name]
mask = np.asfortranarray(mask.astype(np.uint8))
mask = pycocotools.mask.encode(mask)
area = float(pycocotools.mask.area(mask))
bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
data["annotations"].append(
dict(
id=len(data["annotations"]),
image_id=image_id,
category_id=cls_id,
segmentation=segmentations[instance],
area=area,
bbox=bbox,
iscrowd=0,
)
)
if not noviz:
viz = img
if masks:
labels, captions, masks = zip(
*[
(class_name_to_id[cnm], cnm, msk)
for (cnm, gid), msk in masks.items()
if cnm in class_name_to_id
]
)
viz = imgviz.instances2rgb(
image=img,
labels=labels,
masks=masks,
captions=captions,
font_size=15,
line_width=2,
)
out_viz_file = osp.join(
output_dir, "Visualization", set, base + ".jpg"
)
imgviz.io.imsave(out_viz_file, viz)
with open(out_ann_file, "w") as f:
json.dump(data, f)
if __name__ == '__main__':
main()
生成的数据集
├── hanfengseg # 生成的数据集
│ ├── annotations # 标注文件
│ │ ├── instances_train2017.json
│ │ ├── instances_val2017.json
│ │ ├── instances_test2017.json
│ ├── JPEGImages # 图片
│ │ ├── train2017
│ │ │ ├── 0000.jpg
│ │ │ ├── 0001.jpg
│ │ │ ├── ···
│ │ ├── val2017
│ ├── Visualization # 可视化
│ │ ├── train2017
│ │ │ ├── 0000.jpg
│ │ │ ├── 0001.jpg
│ │ │ ├── ···
│ │ ├── val2017
边栏推荐
猜你喜欢
WPF DataGrid 展示数据
Why Manufacturing Companies Should Deploy Digital Factory Systems
shell九九乘法口诀表
无标题文章
How is the private key generated by OpenSSH used in putty?
Oracle - table
How to add F4 Value Help to the input parameters of the report in the ABAP report
ptorch
室外光纤资源管理——可视化管理平台
黑猫带你学Makefile第5篇:Makefile中函数的使用
随机推荐
进化的黑产 vs 进击的蚂蚁:支付宝的每一次点击,都离不开一张“图”的守护
互联网技术从业者怎么解决系统高并发?
[极客大挑战 2019]BuyFlag&&[HCTF 2018]admin
快速搭建SSM框架
How to add F4 Value Help to the input parameters of the report in the ABAP report
How to add F4 Value Help trial version to the input parameters of the report in the ABAP report
对话框管理器第六章:消息循环中的细节
SSM project integration, integrated case
Ability in general, but it can be large horizontal jump freely?Where is the better?
分布式链路追踪Jaeger + 微服务Pig在Rainbond上的实践分享
The history of cartoon rendering
微信小程序第一集
What are the benefits of knowledge sharing for businesses?
黑猫带你学Makefile第8篇:uboot/kernel中的makefile基本语法与流程
软件测试主要是做什么的?
稀疏矩阵转置--C语言
Codeforces Round #712 (Div. 2)(CD)
黑猫带你学Makefile第6篇:Makefile重要规则
PHP解析json数据,显示
瑞芯微rk1126 平台部分jpeg图片解码程序挂掉的问题