diff --git a/dataset.py b/dataset.py index e98f874..4abf746 100755 --- a/dataset.py +++ b/dataset.py @@ -124,10 +124,13 @@ class LmdbDataset(Dataset): self.nSamples = nSamples if self.opt.data_filtering_off: - # for fast check with no filtering + # for fast check or benchmark evaluation with no filtering self.filtered_index_list = [index + 1 for index in range(self.nSamples)] else: - # Filtering + """ Filtering part + If you want to evaluate IC152077 & CUTE80 datasets which have special character labels, use --data_filtering_off and evaluation with this snippet (only evaluate on alphabets and digits). + https://github.com/clovaai/deep-text-recognition-benchmark/blob/master/dataset.py#L186-L188 + """ self.filtered_index_list = [] for index in range(self.nSamples): index += 1 # lmdb starts with 1 @@ -276,7 +279,7 @@ class AlignCollate(object): resized_max_w = self.imgW input_channel = 3 if images[0].mode == 'RGB' else 1 transform = NormalizePAD((input_channel, self.imgH, resized_max_w)) - + resized_images = [] for image in images: w, h = image.size