Source code for en_us_normalization.production.verbalize.decimal

"""
Copyright 2022 Balacoon
Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
Copyright 2015 and onwards Google, Inc.

Verbalizes decimal numbers
"""

import pynini
from en_us_normalization.production.verbalize.cardinal import CardinalFst
from pynini.lib import pynutil

from learn_to_normalize.grammar_utils.base_fst import BaseFst
from learn_to_normalize.grammar_utils.shortcuts import insert_space, LOWER


[docs]class DecimalFst(BaseFst): """ Finite state transducer for verbalizing decimal, i.e. number with integer and fractional part. Integer part is verbalized as a cardinal number. Fraction part is verbalized digit by digit. "point" is inserted in front of fractional part. Grammar heavily reuses transducer for Cardinals. Examples of input/output string: - decimal|negative:1|integer_part:12|fractional_part:5006| -> minus twelve point five o o six """
[docs] def __init__(self, cardinal: CardinalFst = None): super().__init__(name="decimal") if cardinal is None: cardinal = CardinalFst() # expand digits one by one for fractional part fractional = ( pynutil.insert("point") + pynutil.delete("fractional_part:") + insert_space + (pynutil.add_weight(cardinal.get_digit_by_digit_fst(), 1.1) | pynini.cross("0", "zero")) + pynutil.delete("|") ) # reuse cardinal to expand integer part of decimal integer = ( pynutil.delete("integer_part:") + cardinal.get_cardinal_expanding_fst() + pynutil.delete("|") ) # expand quantity - just remove tags if any quantity = ( pynutil.delete("quantity:") + pynini.closure(LOWER, 1) + pynutil.delete("|") ) optional_quantity = pynini.closure(insert_space + quantity, 0, 1) # 3 cases: when there is only integer, only fraction or both both = integer + insert_space + fractional graph = integer | fractional | both optional_sign = pynini.closure(pynini.cross("negative:1|", "minus "), 0, 1) self.graph = optional_sign + graph + optional_quantity self._single_fst = self.delete_tokens(self.graph).optimize()
[docs] def get_graph(self): """ helper function that returns the whole decimal verbalization graph without token name deletion. this is needed if the whole decimal graph is reused in another semiotic class (for ex. measure) """ return self.graph