Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
31 changes: 31 additions & 0 deletions tn/english/data/date/day.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
one
two
three
four
five
six
seven
eight
nine
ten
eleven
twelve
thirteen
fourteen
fifteen
sixteen
seventeen
eighteen
nineteen
twenty
twenty one
twenty two
twenty three
twenty four
twenty five
twenty six
twenty seven
twenty eight
twenty nine
thirty
thirty one
12 changes: 12 additions & 0 deletions tn/english/data/date/month_abbr.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
jan january
feb february
mar march
apr april
jun june
jul july
aug august
sep september
sept september
oct october
nov november
dec december
12 changes: 12 additions & 0 deletions tn/english/data/date/month_name.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
january
february
march
april
may
june
july
august
september
october
november
december
24 changes: 24 additions & 0 deletions tn/english/data/date/month_number.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
1 january
2 february
3 march
4 april
5 may
6 june
7 july
8 august
9 september
10 october
11 november
12 december
01 january
02 february
03 march
04 april
05 may
06 june
07 july
08 august
09 september
10 october
11 november
12 december
16 changes: 16 additions & 0 deletions tn/english/data/date/year_suffix.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
A. D AD
A.D AD
a. d AD
a.d AD
a. d. AD
a.d. AD
B. C BC
B.C BC
b. c BC
b.c BC
A. D. AD
A.D. AD
B. C. BC
B.C. BC
b. c. BC
b.c. BC
18 changes: 18 additions & 0 deletions tn/english/data/number/fraction.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
¼ 1/4
½ 1/2
¾ 3/4
⅐ 1/7
⅑ 1/9
⅒ 1/10
⅓ 1/3
⅔ 2/3
⅕ 1/5
⅖ 2/5
⅗ 3/5
⅘ 4/5
⅙ 1/6
⅚ 5/6
⅛ 1/8
⅜ 3/8
⅝ 5/8
⅞ 7/8
1 change: 1 addition & 0 deletions tn/english/data/number/hundred.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hundred
10 changes: 10 additions & 0 deletions tn/english/data/number/quantity_abbr.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
M million
MLN million
m million
mln million
B billion
b billion
BN billion
bn billion
K thousand
k thousand
10 changes: 10 additions & 0 deletions tn/english/data/number/teen.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ten 10
eleven 11
twelve 12
thirteen 13
fourteen 14
fifteen 15
sixteen 16
seventeen 17
eighteen 18
nineteen 19
22 changes: 22 additions & 0 deletions tn/english/data/number/thousand.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
thousand
million
billion
trillion
quadrillion
quintillion
sextillion
septillion
octillion
nonillion
decillion
undecillion
duodecillion
tredecillion
quattuordecillion
quindecillion
sexdecillion
septendecillion
octodecillion
novemdecillion
vigintillion
centillion
8 changes: 8 additions & 0 deletions tn/english/data/number/ty.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
twenty 2
thirty 3
forty 4
fifty 5
sixty 6
seventy 7
eighty 8
ninety 9
4 changes: 2 additions & 2 deletions tn/english/data/ordinal/digit.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ second two
third three
fourth four
fifth five
sixth sixth
sixth six
seventh seven
eighth eight
ninth nine
ninth nine
11 changes: 8 additions & 3 deletions tn/english/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from tn.english.rules.cardinal import Cardinal
from tn.english.rules.ordinal import Ordinal
from tn.english.rules.word import Word
from tn.english.rules.date import Date

from pynini.lib.pynutil import add_weight, delete
from importlib_resources import files
Expand All @@ -25,22 +26,26 @@
class Normalizer(Processor):

def __init__(self, cache_dir=None, overwrite_cache=False):
super().__init__(name='en_normalizer')
super().__init__(name='en_normalizer', ordertype="en_tn")
if cache_dir is None:
cache_dir = files("tn")
self.build_fst('en_tn', cache_dir, overwrite_cache)

def build_tagger(self):
cardinal = add_weight(Cardinal().tagger, 1.0)
ordinal = add_weight(Ordinal().tagger, 1.0)
date = add_weight(Date().tagger, 0.99)
word = add_weight(Word().tagger, 100)
tagger = (cardinal | ordinal | word).optimize() + self.DELETE_SPACE
tagger = (cardinal | ordinal | word
| date).optimize() + self.DELETE_SPACE
# delete the last space
self.tagger = tagger.star @ self.build_rule(delete(' '), r='[EOS]')

def build_verbalizer(self):
cardinal = Cardinal().verbalizer
ordinal = Ordinal().verbalizer
word = Word().verbalizer
verbalizer = (cardinal | ordinal | word).optimize() + self.INSERT_SPACE
date = Date().verbalizer
verbalizer = (cardinal | ordinal | word
| date).optimize() + self.INSERT_SPACE
self.verbalizer = verbalizer.star
Loading