当前位置:网站首页>Deep learning notes - semantic segmentation and data sets
Deep learning notes - semantic segmentation and data sets
2022-04-23 04:57:00 【Whisper_ yl】
Semantic segmentation only cares about which class the pixel belongs to , Instance segmentation concerns which instance of which class the pixel belongs to ( It can be considered as an evolutionary version of target detection ).
import os
import torch
import torchvision
from d2l import torch as d2l
import matplotlib.pyplot as plt
d2l.DATA_HUB['voc2012'] = (d2l.DATA_URL + 'VOCtrainval_11-May-2012.tar',
'4e443f8a2eca6b1dac8a6c57641b67dd40621a49')
voc_dir = d2l.download_extract('voc2012', 'VOCdevkit/VOC2012')
# Read all input images and labels into memory
def read_voc_images(voc_dir, is_train=True):
""" Read all VOC Image and label """
txt_fname = os.path.join(voc_dir, 'ImageSets', 'Segmentation',
'train.txt' if is_train else 'val.txt')
mode = torchvision.io.image.ImageReadMode.RGB
with open(txt_fname, 'r') as f:
images = f.read().split()
features, labels = [], []
for i, fname in enumerate(images):
features.append(torchvision.io.read_image(os.path.join(
voc_dir, 'JPEGImages', f'{fname}.jpg')))
labels.append(torchvision.io.read_image(os.path.join(
voc_dir, 'SegmentationClass', f'{fname}.png'), mode))
return features, labels
train_features, train_labels = read_voc_images(voc_dir, True)
# Before drawing 5 An input image and its label
n = 5
imgs = train_features[0:n] + train_labels[0:n]
imgs = [img.permute(1, 2, 0) for img in imgs]
d2l.show_images(imgs, 2, n)
plt.show()
# list RGB Color value and class name ( Each label corresponds to pixel Of RGB value )
VOC_COLORMAP = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
[0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
[64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
[64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
[0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
[0, 64, 128]]
VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
'diningtable', 'dog', 'horse', 'motorbike', 'person',
'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']
# Find the class index of each pixel in the label
def voc_colormap2label():
""" Build from RGB To VOC Mapping of category indexes """
colormap2label = torch.zeros(256 ** 3, dtype=torch.long)
for i, colormap in enumerate(VOC_COLORMAP):
colormap2label[
(colormap[0] * 256 + colormap[1]) * 256 + colormap[2]] = i
return colormap2label
def voc_label_indices(colormap, colormap2label):
""" take VOC In the tag RGB Values map to their category index """
colormap = colormap.permute(1, 2, 0).numpy().astype('int32')
idx = ((colormap[:, :, 0] * 256 + colormap[:, :, 1]) * 256
+ colormap[:, :, 2])
return colormap2label[idx]
y = voc_label_indices(train_labels[0], voc_colormap2label())
print(y[105:115, 130:140], VOC_CLASSES[1])
# Use random clipping in image augmentation , Crop the same area of the input image and label
def voc_rand_crop(feature, label, height, width):
""" Randomly crop features and label images """
rect = torchvision.transforms.RandomCrop.get_params(
feature, (height, width))
feature = torchvision.transforms.functional.crop(feature, *rect)
label = torchvision.transforms.functional.crop(label, *rect)
return feature, label
imgs = []
for _ in range(n):
imgs += voc_rand_crop(train_features[0], train_labels[0], 200, 300)
imgs = [img.permute(1, 2, 0) for img in imgs]
d2l.show_images(imgs[::2] + imgs[1::2], 2, n)
plt.show()
# Custom semantic segmentation dataset class
# When doing small batch training , You need to make the picture the same size , So we need to crop, Don't be casual resize
class VOCSegDataset(torch.utils.data.Dataset):
""" One for loading VOC Custom datasets for datasets """
def __init__(self, is_train, crop_size, voc_dir):
# I want to use it later ImageNet Upper pre-train model
self.transform = torchvision.transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
self.crop_size = crop_size
features, labels = read_voc_images(voc_dir, is_train=is_train)
self.features = [
# do RGB Normalization
self.normalize_image(feature)
for feature in self.filter(features)]
self.labels = self.filter(labels)
self.colormap2label = voc_colormap2label()
print('read ' + str(len(self.features)) + ' examples')
def normalize_image(self, img):
return self.transform(img.float() / 255)
# If the picture is better than crop_size If your height and width are smaller , Then discard
def filter(self, imgs):
return [img for img in imgs if (
img.shape[1] >= self.crop_size[0] and
img.shape[2] >= self.crop_size[1])]
def __getitem__(self, idx):
feature, label = voc_rand_crop(self.features[idx], self.labels[idx],
*self.crop_size)
return (feature, voc_label_indices(label, self.colormap2label))
def __len__(self):
return len(self.features)
# Reading data sets
crop_size = (320, 480)
voc_train = VOCSegDataset(True, crop_size, voc_dir)
voc_test = VOCSegDataset(False, crop_size, voc_dir)
batch_size = 64
train_iter = torch.utils.data.DataLoader(voc_train, batch_size, shuffle=True,
drop_last=True,
num_workers=d2l.get_dataloader_workers())
for X, Y in train_iter:
print(X.shape)
print(Y.shape)
break
# Integrate all components
def load_data_voc(batch_size, crop_size):
""" load VOC Semantic segmentation dataset """
voc_dir = d2l.download_extract('voc2012', os.path.join(
'VOCdevkit', 'VOC2012'))
num_workers = d2l.get_dataloader_workers()
train_iter = torch.utils.data.DataLoader(
VOCSegDataset(True, crop_size, voc_dir), batch_size,
shuffle=True, drop_last=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(
VOCSegDataset(False, crop_size, voc_dir), batch_size,
drop_last=True, num_workers=num_workers)
return train_iter, test_iter
版权声明
本文为[Whisper_ yl]所创,转载请带上原文链接,感谢
https://yzsam.com/2022/04/202204230453143607.html
边栏推荐
- Unity摄像头跟随鼠标旋转
- 深度学习笔记 —— 物体检测和数据集 + 锚框
- Differences between redis and MySQL
- Sword finger offer: push in and pop-up sequence of stack
- 敏捷实践 | 提高小组可预测性的敏捷指标
- Sword finger offer: the median in the data stream (priority queue large top heap small top heap leetcode 295)
- No such file or directory problem while executing shell
- Set Chrome browser background to eye protection (eye escort / darkreader plug-in)
- 持续集成(CI)/持续交付(CD)如何彻底改变自动化测试
- PHP+MySQL 制作留言板
猜你喜欢
Excel protects worksheets and workbooks from damage
Wechat payment function
持续集成(CI)/持续交付(CD)如何彻底改变自动化测试
Customize the navigation bar at the top of wechat applet (adaptive wechat capsule button, flex layout)
多线程基本概念(并发与并行、线程与进程)和入门案例
[winui3] write an imitation Explorer file manager
[database] MySQL single table query
Introduction to raspberry pie 3B - system installation
Spell it! Two A-level universities and six B-level universities have abolished master's degree programs in software engineering!
跨境电商 | Facebook 和 Instagram:哪个社交媒体更适合你?
随机推荐
Jetpack -- lifecycle usage and source code analysis
L2-011 玩转二叉树(建树+BFS)
leetcode——启发式搜索
[winui3] write an imitation Explorer file manager
【数据库】MySQL多表查询(一)
The unity camera rotates with the mouse
Custom switch control
负载均衡简介
[winui3] Écrivez une copie du gestionnaire de fichiers Explorer
MySQL memo (for your own query)
What is a blocking queue? What is the implementation principle of blocking queue? How to use blocking queue to implement producer consumer model?
Com alibaba. Common methods of fastjson
Pixel mobile phone brick rescue tutorial
Learning Android from scratch -- baseactivity and activitycollector
Details related to fingerprint payment
Unity摄像头跟随鼠标旋转
Innovation training (XI) airline ticket crawling company information
泰克示波器DPO3054自校准SPC失败维修
Wine (COM) - basic concept
redis和mysql区别