197 lines
7.0 KiB
Python
197 lines
7.0 KiB
Python
import os
|
|
import sys
|
|
import re
|
|
import six
|
|
import lmdb
|
|
import torch
|
|
|
|
from PIL import Image
|
|
import numpy as np
|
|
from torch.utils.data import Dataset, ConcatDataset, Subset
|
|
from torch._utils import _accumulate
|
|
import torchvision.transforms as transforms
|
|
|
|
|
|
class Batch_Balanced_Dataset(object):
|
|
|
|
def __init__(self, opt):
|
|
"""
|
|
Modulate the data ratio in the batch.
|
|
For example, when select_data is "MJ-ST" and batch_ratio is "0.5-0.5",
|
|
the 50% of the batch is filled with MJ and the other 50% of the batch is filled with ST.
|
|
"""
|
|
print('-' * 80)
|
|
print(f'dataset_root: {opt.train_data}\nopt.select_data: {opt.select_data}\nopt.batch_ratio: {opt.batch_ratio}')
|
|
assert len(opt.select_data) == len(opt.batch_ratio)
|
|
|
|
_AlignCollate = AlignCollate(imgH=opt.imgH, imgW=opt.imgW)
|
|
self.data_loader_list = []
|
|
self.dataloader_iter_list = []
|
|
for selected_d, batch_ratio_d in zip(opt.select_data, opt.batch_ratio):
|
|
_batch_size = max(round(opt.batch_size * float(batch_ratio_d)), 1)
|
|
print('-' * 80)
|
|
_dataset = hierarchical_dataset(root=opt.train_data, opt=opt, select_data=[selected_d])
|
|
total_number_dataset = len(_dataset)
|
|
|
|
"""
|
|
The total number of data can be modified with opt.total_data_usage_ratio.
|
|
ex) opt.total_data_usage_ratio = 1 indicates 100% usage, and 0.2 indicates 20% usage.
|
|
See 4.2 section in our paper.
|
|
"""
|
|
number_dataset = int(total_number_dataset * float(opt.total_data_usage_ratio))
|
|
dataset_split = [number_dataset, total_number_dataset - number_dataset]
|
|
indices = range(total_number_dataset)
|
|
_dataset, _ = [Subset(_dataset, indices[offset - length:offset])
|
|
for offset, length in zip(_accumulate(dataset_split), dataset_split)]
|
|
print(f'num total samples of {selected_d}: {total_number_dataset} x {opt.total_data_usage_ratio} (total_data_usage_ratio) = {len(_dataset)}')
|
|
print(f'num samples of {selected_d} per batch: {opt.batch_size} x {float(batch_ratio_d)} (batch_ratio) = {_batch_size}')
|
|
|
|
_data_loader = torch.utils.data.DataLoader(
|
|
_dataset, batch_size=_batch_size,
|
|
shuffle=True,
|
|
num_workers=int(opt.workers),
|
|
collate_fn=_AlignCollate, pin_memory=True)
|
|
self.data_loader_list.append(_data_loader)
|
|
self.dataloader_iter_list.append(iter(_data_loader))
|
|
print('-' * 80)
|
|
|
|
def get_batch(self):
|
|
balanced_batch_images = []
|
|
balanced_batch_texts = []
|
|
|
|
for i, data_loader_iter in enumerate(self.dataloader_iter_list):
|
|
try:
|
|
image, text = data_loader_iter.next()
|
|
balanced_batch_images.append(image)
|
|
balanced_batch_texts += text
|
|
except StopIteration:
|
|
self.dataloader_iter_list[i] = iter(self.data_loader_list[i])
|
|
image, text = self.dataloader_iter_list[i].next()
|
|
balanced_batch_images.append(image)
|
|
balanced_batch_texts += text
|
|
except ValueError:
|
|
pass
|
|
|
|
balanced_batch_images = torch.cat(balanced_batch_images, 0)
|
|
|
|
return balanced_batch_images, balanced_batch_texts
|
|
|
|
|
|
def hierarchical_dataset(root, opt, select_data='/'):
|
|
""" select_data='/' contains all sub-directory of root directory """
|
|
dataset_list = []
|
|
print(f'dataset_root: {root}\t dataset: {select_data[0]}')
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
if not dirnames:
|
|
select_flag = False
|
|
for selected_d in select_data:
|
|
if selected_d in dirpath:
|
|
select_flag = True
|
|
break
|
|
|
|
if select_flag:
|
|
dataset = LmdbDataset(dirpath, opt)
|
|
print(f'sub-directory:\t/{os.path.relpath(dirpath, root)}\t num samples: {len(dataset)}')
|
|
dataset_list.append(dataset)
|
|
|
|
concatenated_dataset = ConcatDataset(dataset_list)
|
|
|
|
return concatenated_dataset
|
|
|
|
|
|
class LmdbDataset(Dataset):
|
|
|
|
def __init__(self, root, opt):
|
|
|
|
self.root = root
|
|
self.opt = opt
|
|
self.env = lmdb.open(root, max_readers=32, readonly=True, lock=False, readahead=False, meminit=False)
|
|
if not self.env:
|
|
print('cannot create lmdb from %s' % (root))
|
|
sys.exit(0)
|
|
|
|
with self.env.begin(write=False) as txn:
|
|
nSamples = int(txn.get('num-samples'.encode()))
|
|
self.nSamples = nSamples
|
|
|
|
def __len__(self):
|
|
return self.nSamples
|
|
|
|
def __getitem__(self, index):
|
|
assert index <= len(self), 'index range error'
|
|
index += 1
|
|
with self.env.begin(write=False) as txn:
|
|
label_key = 'label-%09d'.encode() % index
|
|
label = txn.get(label_key).decode('utf-8')
|
|
img_key = 'image-%09d'.encode() % index
|
|
imgbuf = txn.get(img_key)
|
|
|
|
buf = six.BytesIO()
|
|
buf.write(imgbuf)
|
|
buf.seek(0)
|
|
try:
|
|
if self.opt.rgb:
|
|
img = Image.open(buf).convert('RGB') # for color image
|
|
else:
|
|
img = Image.open(buf).convert('L')
|
|
|
|
except IOError:
|
|
print(f'Corrupted image for {index}')
|
|
return
|
|
|
|
if len(label) > self.opt.batch_max_length:
|
|
return
|
|
|
|
if not self.opt.sensitive:
|
|
label = label.lower()
|
|
|
|
# We only train and evaluate on alphanumerics (or pre-defined character set in train.py)
|
|
out_of_char = f'[^{self.opt.character}]'
|
|
label = re.sub(out_of_char, '', label)
|
|
|
|
return (img, label)
|
|
|
|
|
|
class ResizeNormalize(object):
|
|
|
|
def __init__(self, size, interpolation=Image.BICUBIC):
|
|
self.size = size
|
|
self.interpolation = interpolation
|
|
self.toTensor = transforms.ToTensor()
|
|
|
|
def __call__(self, img):
|
|
img = img.resize(self.size, self.interpolation)
|
|
img = self.toTensor(img)
|
|
img.sub_(0.5).div_(0.5)
|
|
return img
|
|
|
|
|
|
class AlignCollate(object):
|
|
|
|
def __init__(self, imgH=32, imgW=100):
|
|
self.imgH = imgH
|
|
self.imgW = imgW
|
|
|
|
def __call__(self, batch):
|
|
batch = filter(lambda x: x is not None, batch)
|
|
images, labels = zip(*batch)
|
|
|
|
transform = ResizeNormalize((self.imgW, self.imgH))
|
|
image_tensors = [transform(image) for image in images]
|
|
image_tensors = torch.cat([t.unsqueeze(0) for t in image_tensors], 0)
|
|
|
|
return image_tensors, labels
|
|
|
|
|
|
def tensor2im(image_tensor, imtype=np.uint8):
|
|
image_numpy = image_tensor.cpu().float().numpy()
|
|
if image_numpy.shape[0] == 1:
|
|
image_numpy = np.tile(image_numpy, (3, 1, 1))
|
|
image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
|
|
return image_numpy.astype(imtype)
|
|
|
|
|
|
def save_image(image_numpy, image_path):
|
|
image_pil = Image.fromarray(image_numpy)
|
|
image_pil.save(image_path)
|