Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions tn/english/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,21 @@ def build_tagger(self):
punct = add_weight(Punctuation().tagger, 2.00)
rang = add_weight(Range().tagger, 1.01)
# TODO(xcsong): add roman
tagger = punct.star + \
(cardinal | ordinal | word
| date | decimal | fraction
| time | measure | money
tagger = \
(cardinal
| ordinal
| word
| date
| decimal
| fraction
| time
| measure
| money
| telephone | electronic
| whitelist
| rang).optimize() + (punct.plus | self.DELETE_SPACE)
| rang
| punct
).optimize() + (punct.plus | self.DELETE_SPACE)
# delete the last space
self.tagger = tagger.star @ self.build_rule(delete(' '), r='[EOS]')

Expand All @@ -83,14 +91,20 @@ def build_verbalizer(self):
punct = Punctuation().verbalizer
rang = Range().verbalizer
verbalizer = \
(cardinal | ordinal | word
| date | decimal
| fraction | time
| measure | money
(cardinal
| ordinal
| word
| date
| decimal
| fraction
| time
| measure
| money
| telephone
| electronic
| whitelist
| punct
| rang).optimize() + (punct.plus | self.INSERT_SPACE)
| rang
).optimize() + (punct.plus | self.INSERT_SPACE)
self.verbalizer = verbalizer.star @ self.build_rule(delete(' '),
r='[EOS]')
1 change: 1 addition & 0 deletions tn/english/test/data/normalizer.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ The National Map, accessed April 1, 2011" Site Description of Koppers Co. From t
.345" and ".456" "9.456" or 6.7890" => point three four five" and ".four hundred and fifty six" "nine point four five six" or six point seven eight nine oh"
The museum is open Mon.-Sun. children of 3-4 years 123 The plan will help you lose 3-4 pounds the first week, and 1-2 pounds the weeks thereafter. => The museum is open Monday to Sunday children of three to four years one hundred and twenty three The plan will help you lose three to four pounds the first week, and one to two pounds the weeks thereafter.
Try searching for 'Toyota' or 'Investment' => Try searching for 'Toyota' or 'Investment'
"" => ""
5 changes: 2 additions & 3 deletions tn/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ def main():
full_to_half=str2bool(args.full_to_half),
tag_oov=str2bool(args.tag_oov))
elif args.language == "en":
normalizer = EnNormalizer(
cache_dir=args.cache_dir,
overwrite_cache=args.overwrite_cache)
normalizer = EnNormalizer(cache_dir=args.cache_dir,
overwrite_cache=args.overwrite_cache)

if args.text:
print(normalizer.tag(args.text))
Expand Down