当前位置:网站首页>制作实例分割数据集
制作实例分割数据集
2022-08-08 19:25:00 【SmileAtlas】
项目结构
├── data # 数据集
│ ├── data_annotated # 标注的文件
│ │ ├── 0000.bmp
│ │ ├── 0000.json
│ ├── creat_txt.py
│ ├── classify_file.py
│ ├── labelme2coco.py
│ ├── labels.txt
创建labels.txt
__ignore__
_background_
A
B
划分数据集creat_txt.py
# -*- coding: utf-8 -*-
import os
import glob
import random
root = os.getcwd()
data_annotated_path = os.path.join(root, 'data_annotated')
txt_save_path = root
json_file_path = glob.glob(os.path.join(data_annotated_path, '*.json'))
img_file_path = glob.glob(os.path.join(data_annotated_path, '*.bmp'))
assert len(json_file_path) == len(img_file_path)
trainval_percent = 1 # No test sample
train_percent = 0.9
num = len(json_file_path)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
f_trainval = open(os.path.join(root, 'trainval2017.txt'), 'w')
f_train = open(os.path.join(root, 'train2017.txt'), 'w')
f_val = open(os.path.join(root, 'val2017.txt'), 'w')
f_test = open(os.path.join(root, 'test2017.txt'), 'w')
for i in list:
name = os.path.basename(json_file_path[i]).split('.')[0] + '\n'
if i in trainval:
f_trainval.write(name)
if i in train:
f_train.write(name)
else:
f_val.write(name)
else:
f_test.write(name)
f_trainval.close()
f_train.close()
f_val.close()
f_test.close()
print('Create_txt Done')
得到以下几个文件
├── data # 数据集
│ ├── test2017.txt
│ ├── train2017.txt
│ ├── trainval2017.txt
│ ├── val2017.txt
拷贝到对应文件夹classify_file.py
import shutil
import os
import os.path as osp
sets = ['train2017', 'val2017', 'test2017']
for image_set in sets:
if osp.exists(image_set):
shutil.rmtree(image_set)
print('Deleted previous %s file and created a new one' % image_set)
os.makedirs(image_set)
json_path = '%s' % image_set
if osp.exists(json_path):
shutil.rmtree(json_path)
print('Deleted previous %s file and created a new one' % json_path)
os.makedirs(json_path)
image_ids = open('./%s.txt' % image_set).read().strip().split()
for image_id in image_ids:
img = 'data_annotated/%s.bmp' % image_id
json = 'data_annotated/%s.json' % image_id
shutil.copy(img, image_set)
shutil.copy(json, '%s/' % image_set)
print("Done")
处理后得到以下文件
├── data # 数据集
│ ├── train2017
│ │ ├── 0000.bmp
│ │ ├── 0000.json
│ │ ├── ···
│ ├── val2017
│ │ ├── 0001.bmp
│ │ ├── 0001.json
│ │ ├── ···
│ ├── test2017
转换为coco格式labelme2coco.py
import collections
import datetime
import glob
import json
import uuid
import os
import os.path as osp
import sys
import numpy as np
import imgviz
import labelme
import shutil
try:
import pycocotools.mask
except ImportError:
print('Please install pycocotools:\n\n pip install pycocotools\n')
sys.exit(1)
def main():
noviz = False
root = os.getcwd()
dataset = 'hanfengseg'
sets = ['train2017', 'val2017', 'test2017']
output_dir = osp.join(root, dataset)
if not osp.exists(output_dir):
os.makedirs(output_dir)
print('Creating dataset:', output_dir)
else:
shutil.rmtree(output_dir)
print('Output directory already exists:', output_dir)
if not osp.exists(osp.join(output_dir, 'annotations')):
os.makedirs(osp.join(output_dir, 'annotations'))
if not osp.exists(osp.join(output_dir, 'JPEGImages')):
os.makedirs(osp.join(output_dir, 'JPEGImages'))
if not osp.exists(osp.join(output_dir, 'Visualization')):
os.makedirs(osp.join(output_dir, 'Visualization'))
for set in sets:
input_dir = './%s' % set # ./train2017
filename = 'instances_%s' % set # instances_train2017
now = datetime.datetime.now()
data = dict(
info=dict(
description='HanFeng',
url=None,
version="5.0.1",
year="2022",
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None, )],
images=[
# license, url, file_name, height, width, date_captured, id
],
type="instances",
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
],
)
class_name_to_id = {}
for i, line in enumerate(open(osp.join(root, 'labels.txt')).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
if class_id == -1:
assert class_name == "__ignore__"
continue
class_name_to_id[class_name] = class_id
data["categories"].append(
dict(supercategory=None, id=class_id, name=class_name, )
)
out_ann_file = osp.join(output_dir, 'annotations', filename + '.json') # ./annotations\instances_test2017.json
label_files = glob.glob(osp.join(input_dir, '*.json')) # [./train2017\\0.json, ...]
for image_id, filename in enumerate(label_files):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(output_dir, "JPEGImages", set, base + ".jpg")
img = labelme.utils.img_data_to_arr(label_file.imageData)
imgviz.io.imsave(out_img_file, img)
data["images"].append(
dict(
license=0,
url=None,
file_name=base + ".jpg",
height=img.shape[0],
width=img.shape[1],
date_captured=None,
id=image_id,
)
)
masks = {} # for area
segmentations = collections.defaultdict(list) # for segmentation
for shape in label_file.shapes:
points = shape["points"]
label = shape["label"]
group_id = shape.get("group_id")
shape_type = shape.get("shape_type", "polygon")
mask = labelme.utils.shape_to_mask(
img.shape[:2], points, shape_type
)
if group_id is None:
group_id = uuid.uuid1()
instance = (label, group_id)
if instance in masks:
masks[instance] = masks[instance] | mask
else:
masks[instance] = mask
if shape_type == "rectangle":
(x1, y1), (x2, y2) = points
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [x1, y1, x2, y1, x2, y2, x1, y2]
if shape_type == "circle":
(x1, y1), (x2, y2) = points
r = np.linalg.norm([x2 - x1, y2 - y1])
# r(1-cos(a/2))<x, a=2*pi/N => N>pi/arccos(1-x/r)
# x: tolerance of the gap between the arc and the line segment
n_points_circle = max(int(np.pi / np.arccos(1 - 1 / r)), 12)
i = np.arange(n_points_circle)
x = x1 + r * np.sin(2 * np.pi / n_points_circle * i)
y = y1 + r * np.cos(2 * np.pi / n_points_circle * i)
points = np.stack((x, y), axis=1).flatten().tolist()
else:
points = np.asarray(points).flatten().tolist()
segmentations[instance].append(points)
segmentations = dict(segmentations)
for instance, mask in masks.items():
cls_name, group_id = instance
if cls_name not in class_name_to_id:
continue
cls_id = class_name_to_id[cls_name]
mask = np.asfortranarray(mask.astype(np.uint8))
mask = pycocotools.mask.encode(mask)
area = float(pycocotools.mask.area(mask))
bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
data["annotations"].append(
dict(
id=len(data["annotations"]),
image_id=image_id,
category_id=cls_id,
segmentation=segmentations[instance],
area=area,
bbox=bbox,
iscrowd=0,
)
)
if not noviz:
viz = img
if masks:
labels, captions, masks = zip(
*[
(class_name_to_id[cnm], cnm, msk)
for (cnm, gid), msk in masks.items()
if cnm in class_name_to_id
]
)
viz = imgviz.instances2rgb(
image=img,
labels=labels,
masks=masks,
captions=captions,
font_size=15,
line_width=2,
)
out_viz_file = osp.join(
output_dir, "Visualization", set, base + ".jpg"
)
imgviz.io.imsave(out_viz_file, viz)
with open(out_ann_file, "w") as f:
json.dump(data, f)
if __name__ == '__main__':
main()
生成的数据集
├── hanfengseg # 生成的数据集
│ ├── annotations # 标注文件
│ │ ├── instances_train2017.json
│ │ ├── instances_val2017.json
│ │ ├── instances_test2017.json
│ ├── JPEGImages # 图片
│ │ ├── train2017
│ │ │ ├── 0000.jpg
│ │ │ ├── 0001.jpg
│ │ │ ├── ···
│ │ ├── val2017
│ ├── Visualization # 可视化
│ │ ├── train2017
│ │ │ ├── 0000.jpg
│ │ │ ├── 0001.jpg
│ │ │ ├── ···
│ │ ├── val2017
边栏推荐
猜你喜欢
随机推荐
Dry goods: design high concurrency architecture from scratch
Codeforces Round #712 (Div. 2)(CD)
Monaco-Editor Multiplayer Collaboration Editor
Leetcode 23.合并K个升序链表 链表归并合并
JVM调优-JVM调优实践一
阿里云数据库PolarDB开源人才培养计划发布!万元好礼等你来拿!
培训预告 | 企业应用现代化实用教程——DevOps方法论及最佳实践篇 8月11日上线
Azure Neural TTS continues to be updated to help enterprises develop small language markets
synApps -- Autosave
Why Manufacturing Companies Should Deploy Digital Factory Systems
挖财学堂帮开通的证券账户是真的吗?安全吗
疫情期间闲来无事,我自制了一个按钮展示框特效来展示我的博客
达梦数据库 DmAPservice服务,启停影响 DMSERVER库服务吗?
nyoj685 查找字符串(map)
“12306” 的架构到底有多牛逼?
PyTorch入门:(四)torchvision中数据集的使用
企业进行知识共享的好处有哪些?
分布式链路追踪Jaeger + 微服务Pig在Rainbond上的实践分享
nyoj 712 Exploring treasure
5 IPOs, Internet home improvement is not as simple as Tubatu thinks








