-
Notifications
You must be signed in to change notification settings - Fork 240
Open
Description
Can anyone explain why the code below work? It seems to just extract first letter of the tokens. Thanks.
class CharFeaturizer(TextFeaturizer):
__def __init_vocabulary(self):
lines = []
if self.decoder_config.vocabulary is not None:
with codecs.open(self.decoder_config.vocabulary, "r") as fin:
lines.extend(fin.readlines())
else:
lines = ENGLISH_CHARACTERS
self.blank = 0 if self.decoder_config.blank_at_zero else None
self.tokens2indices = {}
self.tokens = []
index = 1 if self.blank == 0 else 0
for line in lines:
line = self.preprocess_text(line)
if line.startswith("#") or not line:
continue
self.tokens2indices[line[0]] = index
self.tokens.append(line[0])
index += 1
if self.blank is None:
self.blank = len(self.tokens) # blank not at zero
self.non_blank_tokens = self.tokens.copy()
self.tokens.insert(self.blank, "") # add blank token to tokens
self.num_classes = len(self.tokens)
self.tokens = tf.convert_to_tensor(self.tokens, dtype=tf.string)
self.upoints = tf.strings.unicode_decode(self.tokens, "UTF-8").to_tensor(shape=[None, 1])__
Metadata
Metadata
Assignees
Labels
No labels