当前位置:网站首页>制作实例分割数据集
制作实例分割数据集
2022-08-08 19:25:00 【SmileAtlas】
项目结构
├── data # 数据集
│ ├── data_annotated # 标注的文件
│ │ ├── 0000.bmp
│ │ ├── 0000.json
│ ├── creat_txt.py
│ ├── classify_file.py
│ ├── labelme2coco.py
│ ├── labels.txt
创建labels.txt
__ignore__
_background_
A
B
划分数据集creat_txt.py
# -*- coding: utf-8 -*-
import os
import glob
import random
root = os.getcwd()
data_annotated_path = os.path.join(root, 'data_annotated')
txt_save_path = root
json_file_path = glob.glob(os.path.join(data_annotated_path, '*.json'))
img_file_path = glob.glob(os.path.join(data_annotated_path, '*.bmp'))
assert len(json_file_path) == len(img_file_path)
trainval_percent = 1 # No test sample
train_percent = 0.9
num = len(json_file_path)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
f_trainval = open(os.path.join(root, 'trainval2017.txt'), 'w')
f_train = open(os.path.join(root, 'train2017.txt'), 'w')
f_val = open(os.path.join(root, 'val2017.txt'), 'w')
f_test = open(os.path.join(root, 'test2017.txt'), 'w')
for i in list:
name = os.path.basename(json_file_path[i]).split('.')[0] + '\n'
if i in trainval:
f_trainval.write(name)
if i in train:
f_train.write(name)
else:
f_val.write(name)
else:
f_test.write(name)
f_trainval.close()
f_train.close()
f_val.close()
f_test.close()
print('Create_txt Done')
得到以下几个文件
├── data # 数据集
│ ├── test2017.txt
│ ├── train2017.txt
│ ├── trainval2017.txt
│ ├── val2017.txt
拷贝到对应文件夹classify_file.py
import shutil
import os
import os.path as osp
sets = ['train2017', 'val2017', 'test2017']
for image_set in sets:
if osp.exists(image_set):
shutil.rmtree(image_set)
print('Deleted previous %s file and created a new one' % image_set)
os.makedirs(image_set)
json_path = '%s' % image_set
if osp.exists(json_path):
shutil.rmtree(json_path)
print('Deleted previous %s file and created a new one' % json_path)
os.makedirs(json_path)
image_ids = open('./%s.txt' % image_set).read().strip().split()
for image_id in image_ids:
img = 'data_annotated/%s.bmp' % image_id
json = 'data_annotated/%s.json' % image_id
shutil.copy(img, image_set)
shutil.copy(json, '%s/' % image_set)
print("Done")
处理后得到以下文件
├── data # 数据集
│ ├── train2017
│ │ ├── 0000.bmp
│ │ ├── 0000.json
│ │ ├── ···
│ ├── val2017
│ │ ├── 0001.bmp
│ │ ├── 0001.json
│ │ ├── ···
│ ├── test2017
转换为coco格式labelme2coco.py
import collections
import datetime
import glob
import json
import uuid
import os
import os.path as osp
import sys
import numpy as np
import imgviz
import labelme
import shutil
try:
import pycocotools.mask
except ImportError:
print('Please install pycocotools:\n\n pip install pycocotools\n')
sys.exit(1)
def main():
noviz = False
root = os.getcwd()
dataset = 'hanfengseg'
sets = ['train2017', 'val2017', 'test2017']
output_dir = osp.join(root, dataset)
if not osp.exists(output_dir):
os.makedirs(output_dir)
print('Creating dataset:', output_dir)
else:
shutil.rmtree(output_dir)
print('Output directory already exists:', output_dir)
if not osp.exists(osp.join(output_dir, 'annotations')):
os.makedirs(osp.join(output_dir, 'annotations'))
if not osp.exists(osp.join(output_dir, 'JPEGImages')):
os.makedirs(osp.join(output_dir, 'JPEGImages'))
if not osp.exists(osp.join(output_dir, 'Visualization')):
os.makedirs(osp.join(output_dir, 'Visualization'))
for set in sets:
input_dir = './%s' % set # ./train2017
filename = 'instances_%s' % set # instances_train2017
now = datetime.datetime.now()
data = dict(
info=dict(
description='HanFeng',
url=None,
version="5.0.1",
year="2022",
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None, )],
images=[
# license, url, file_name, height, width, date_captured, id
],
type="instances",
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
],
)
class_name_to_id = {}
for i, line in enumerate(open(osp.join(root, 'labels.txt')).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
if class_id == -1:
assert class_name == "__ignore__"
continue
class_name_to_id[class_name] = class_id
data["categories"].append(
dict(supercategory=None, id=class_id, name=class_name, )
)
out_ann_file = osp.join(output_dir, 'annotations', filename + '.json') # ./annotations\instances_test2017.json
label_files = glob.glob(osp.join(input_dir, '*.json')) # [./train2017\\0.json, ...]
for image_id, filename in enumerate(label_files):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(output_dir, "JPEGImages", set, base + ".jpg")
img = labelme.utils.img_data_to_arr(label_file.imageData)
imgviz.io.imsave(out_img_file, img)
data["images"].append(
dict(
license=0,
url=None,
file_name=base + ".jpg",
height=img.shape[0],
width=img.shape[1],
date_captured=None,
id=image_id,
)
)
masks = {} # for area
segmentations = collections.defaultdict(list) # for segmentation
for shape in label_file.shapes:
points = shape["points"]
label = shape["label"]
group_id = shape.get("group_id")
shape_type = shape.get("shape_type", "polygon")
mask = labelme.utils.shape_to_mask(
img.shape[:2], points, shape_type
)
if group_id is None:
group_id = uuid.uuid1()
instance = (label, group_id)
if instance in masks:
masks[instance] = masks[instance] | mask
else:
masks[instance] = mask
if shape_type == "rectangle":
(x1, y1), (x2, y2) = points
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [x1, y1, x2, y1, x2, y2, x1, y2]
if shape_type == "circle":
(x1, y1), (x2, y2) = points
r = np.linalg.norm([x2 - x1, y2 - y1])
# r(1-cos(a/2))<x, a=2*pi/N => N>pi/arccos(1-x/r)
# x: tolerance of the gap between the arc and the line segment
n_points_circle = max(int(np.pi / np.arccos(1 - 1 / r)), 12)
i = np.arange(n_points_circle)
x = x1 + r * np.sin(2 * np.pi / n_points_circle * i)
y = y1 + r * np.cos(2 * np.pi / n_points_circle * i)
points = np.stack((x, y), axis=1).flatten().tolist()
else:
points = np.asarray(points).flatten().tolist()
segmentations[instance].append(points)
segmentations = dict(segmentations)
for instance, mask in masks.items():
cls_name, group_id = instance
if cls_name not in class_name_to_id:
continue
cls_id = class_name_to_id[cls_name]
mask = np.asfortranarray(mask.astype(np.uint8))
mask = pycocotools.mask.encode(mask)
area = float(pycocotools.mask.area(mask))
bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
data["annotations"].append(
dict(
id=len(data["annotations"]),
image_id=image_id,
category_id=cls_id,
segmentation=segmentations[instance],
area=area,
bbox=bbox,
iscrowd=0,
)
)
if not noviz:
viz = img
if masks:
labels, captions, masks = zip(
*[
(class_name_to_id[cnm], cnm, msk)
for (cnm, gid), msk in masks.items()
if cnm in class_name_to_id
]
)
viz = imgviz.instances2rgb(
image=img,
labels=labels,
masks=masks,
captions=captions,
font_size=15,
line_width=2,
)
out_viz_file = osp.join(
output_dir, "Visualization", set, base + ".jpg"
)
imgviz.io.imsave(out_viz_file, viz)
with open(out_ann_file, "w") as f:
json.dump(data, f)
if __name__ == '__main__':
main()
生成的数据集
├── hanfengseg # 生成的数据集
│ ├── annotations # 标注文件
│ │ ├── instances_train2017.json
│ │ ├── instances_val2017.json
│ │ ├── instances_test2017.json
│ ├── JPEGImages # 图片
│ │ ├── train2017
│ │ │ ├── 0000.jpg
│ │ │ ├── 0001.jpg
│ │ │ ├── ···
│ │ ├── val2017
│ ├── Visualization # 可视化
│ │ ├── train2017
│ │ │ ├── 0000.jpg
│ │ │ ├── 0001.jpg
│ │ │ ├── ···
│ │ ├── val2017
边栏推荐
- odoo login layout adjustment
- golang for循环详解
- PX4-Things you need to know for secondary development of flight control-Cxm
- 分布式文件系统fastDFS
- Codeforces Round #725 (Div. 3)
- Redis Server启动过程
- Fortinet new cloud native protection products launched amazon cloud platform of science and technology
- hdu1495 非常可乐 (广搜)
- Goose Factory Robot Dog Fancy Crossing 10m Plum Blossom Pile: Front Flip, Single Pile Jump, Get Up and Bow... No stumble in the whole process
- Securities account is better to choose which brokerage platform, which is more safe
猜你喜欢
El - tree set radio, click finish after assemble
软考中级网络工程师全面学习笔记第2版(5万字)+配套视频及课件
Research on ORACLE subqueries that lead to inability to push predicates
Fortinet new cloud native protection products launched amazon cloud platform of science and technology
How to add F4 Value Help to the input parameters of the report in the ABAP report
卡通渲染的历史
“12306” 的架构到底有多牛逼?
我们为什么要远离Service Mesh
BP neural network
数据泵导出数据报39006是什么原因
随机推荐
软件测试主要是做什么的?
智驾科技完成C1轮融资,此前2轮已融4.5亿元
Fortinet new cloud native protection products launched amazon cloud platform of science and technology
01. Preface
ptorch
黑猫带你学Makefile第1篇:什么是Makefile
证券开户选哪个券商平台比较好,哪个更安全
数据库学习之表的操作
Salesforce开发之 apex操作批准过程(Approval Process)
ADB安装方法:
hdu2018 母牛的故事(模拟)
Oracle存储修改以前的历史记录,怎么查找?
企业进行知识共享的好处有哪些?
Redis Server启动过程
[BJDCTF2020]Easy MD5
The history of cartoon rendering
继承的详解
RADIUS服务器的演变过程
Wps文档云同步如何开启?Wps打开文档云同步的方法
Dry goods: design high concurrency architecture from scratch