CTC token update

This commit is contained in:
Baek JeongHun 2019-04-18 01:58:35 +00:00
parent 58494a8503
commit f48694a2af
1 changed files with 7 additions and 5 deletions

View File

@ -6,10 +6,12 @@ class CTCLabelConverter(object):
def __init__(self, character):
# character (str): set of the possible characters.
self.character = ' ' + character # ' ' for blank token (index 0)
list_token = ['[BLANK]'] # '[BLANK] ' for blank token (index 0)
list_character = list(character)
self.character = list_token + list_character
self.dict = {}
for i, char in enumerate(character):
for i, char in enumerate(self.character):
# NOTE: 0 is reserved for 'blank' token required by CTCLoss
self.dict[char] = i + 1
@ -58,9 +60,9 @@ class AttnLabelConverter(object):
self.character = list_token + list_character
self.dict = {}
for i, item in enumerate(self.character):
# print(i, item)
self.dict[item] = i
for i, char in enumerate(self.character):
# print(i, char)
self.dict[char] = i
def encode(self, text, batch_max_length=25):
""" convert text-label into text-index.