summaryrefslogtreecommitdiff
path: root/lib/voices/spanish/el_diphone/festvox/sptoken.scm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/voices/spanish/el_diphone/festvox/sptoken.scm')
-rw-r--r--lib/voices/spanish/el_diphone/festvox/sptoken.scm227
1 files changed, 227 insertions, 0 deletions
diff --git a/lib/voices/spanish/el_diphone/festvox/sptoken.scm b/lib/voices/spanish/el_diphone/festvox/sptoken.scm
new file mode 100644
index 0000000..cc2dbac
--- /dev/null
+++ b/lib/voices/spanish/el_diphone/festvox/sptoken.scm
@@ -0,0 +1,227 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; ;;
+;;; Centre for Speech Technology Research ;;
+;;; University of Edinburgh, UK ;;
+;;; Copyright (c) 1996,1997 ;;
+;;; All Rights Reserved. ;;
+;;; ;;
+;;; Permission is hereby granted, free of charge, to use and distribute ;;
+;;; this software and its documentation without restriction, including ;;
+;;; without limitation the rights to use, copy, modify, merge, publish, ;;
+;;; distribute, sublicense, and/or sell copies of this work, and to ;;
+;;; permit persons to whom this work is furnished to do so, subject to ;;
+;;; the following conditions: ;;
+;;; 1. The code must retain the above copyright notice, this list of ;;
+;;; conditions and the following disclaimer. ;;
+;;; 2. Any modifications must be clearly marked as such. ;;
+;;; 3. Original authors' names are not deleted. ;;
+;;; 4. The authors' names are not used to endorse or promote products ;;
+;;; derived from this software without specific prior written ;;
+;;; permission. ;;
+;;; ;;
+;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
+;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
+;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
+;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
+;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
+;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
+;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
+;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
+;;; THIS SOFTWARE. ;;
+;;; ;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Tokenization rules for spanish
+;;;
+;;; Particularly numbers and symbols.
+;;;
+;;; As the "el" database has no dipthongs, numbers sound much
+;;; better removing the weak vowel ("ventiuno" instead of "veintiuno")
+;;; Many speakers do this, so no problem with it.
+
+(define (spanish_number name)
+"(spanish_number name)
+Convert a string of digits into a list of words saying the number."
+ (if (string-matches name "0")
+ (list "cero")
+ (spanish_number_from_digits (symbolexplode name))))
+
+(define (just_zeros digits)
+"(just_zeros digits)
+If this only contains 0s then we just do something different."
+ (cond
+ ((not digits) t)
+ ((string-equal "0" (car digits))
+ (just_zeros (cdr digits)))
+ (t nil)))
+
+(define (spanish_number_from_digits digits)
+ "(spanish_number_from_digits digits)
+Takes a list of digits and converts it to a list of words
+saying the number."
+ (let ((l (length digits)))
+ (cond
+ ((equal? l 0)
+ nil)
+ ((string-equal (car digits) "0")
+ (spanish_number_from_digits (cdr digits)))
+ ((equal? l 1);; single digit
+ (cond
+ ((string-equal (car digits) "0") (list "cero"))
+ ((string-equal (car digits) "1") (list "un"))
+ ((string-equal (car digits) "2") (list "dos"))
+ ((string-equal (car digits) "3") (list "tres"))
+ ((string-equal (car digits) "4") (list "cuatro"))
+ ((string-equal (car digits) "5") (list "cinco"))
+ ((string-equal (car digits) "6") (list "seis"))
+ ((string-equal (car digits) "7") (list "siete"))
+ ((string-equal (car digits) "8") (list "ocho"))
+ ((string-equal (car digits) "9") (list "nueve"))
+ ;; fill in the rest
+ (t (list "equis"))));; $$$ what should say?
+ ((equal? l 2);; less than 100
+ (cond
+ ((string-equal (car digits) "0");; 0x
+ (spanish_number_from_digits (cdr digits)))
+
+ ((string-equal (car digits) "1");; 1x
+ (cond
+ ((string-equal (car (cdr digits)) "0") (list "diez"))
+ ((string-equal (car (cdr digits)) "1") (list "once"))
+ ((string-equal (car (cdr digits)) "2") (list "doce"))
+ ((string-equal (car (cdr digits)) "3") (list "trece"))
+ ((string-equal (car (cdr digits)) "4") (list "catorce"))
+ ((string-equal (car (cdr digits)) "5") (list "quince"))
+ (t
+ (cons "dieci" (spanish_number_from_digits (cdr digits))))))
+
+ ((string-equal (car digits) "2");; 2x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "veinte")
+ (cons "venti" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "3");; 3x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "treinta")
+ (cons "trentai" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "4");; 4x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "cuarenta")
+ (cons "cuarentai" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "5");; 5x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "cincuenta")
+ (cons "cincuentai" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "6");; 6x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "sesenta")
+ (cons "sesentai" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "7");; 7x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "setenta")
+ (cons "setentai" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "8");; 8x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "ochenta")
+ (cons "ochentai" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "9");; 9x
+ (if (string-equal (car (cdr digits)) "0")
+ (list "noventa")
+ (cons "noventai" (spanish_number_from_digits (cdr digits)))))
+
+ ))
+
+ ((equal? l 3);; in the hundreds
+ (cond
+
+ ((string-equal (car digits) "1");; 1xx
+ (if (just_zeros (cdr digits)) (list "cien")
+ (cons "ciento" (spanish_number_from_digits (cdr digits)))))
+
+ ((string-equal (car digits) "5");; 5xx
+ (cons "quinientos" (spanish_number_from_digits (cdr digits))))
+
+ ((string-equal (car digits) "7");; 7xx
+ (cons "setecientos" (spanish_number_from_digits (cdr digits))))
+
+ ((string-equal (car digits) "9");; 9xx
+ (cons "novecientos" (spanish_number_from_digits (cdr digits))))
+
+ (t;; ?xx
+ (append (spanish_number_from_digits (list (car digits)))
+ (list "cientos")
+ (spanish_number_from_digits (cdr digits))))
+ ))
+
+ ((< l 7)
+ (let ((sub_thousands
+ (list
+ (car (cdr (cdr (reverse digits))))
+ (car (cdr (reverse digits)))
+ (car (reverse digits))))
+ (thousands (reverse (cdr (cdr (cdr (reverse digits)))))))
+ (set! x (spanish_number_from_digits thousands))
+ (append
+ (if (string-equal (car x) "un") nil x)
+ (list "mil")
+ (spanish_number_from_digits sub_thousands))))
+
+ ((< l 13)
+ (let ((sub_million
+ (list
+ (car (cdr (cdr (cdr (cdr (cdr(reverse digits)))))))
+ (car (cdr (cdr (cdr (cdr (reverse digits))))))
+ (car (cdr (cdr (cdr (reverse digits)))))
+ (car (cdr (cdr (reverse digits))))
+ (car (cdr (reverse digits)))
+ (car (reverse digits))
+ ))
+ (millions (reverse (cdr (cdr (cdr (cdr (cdr (cdr (reverse digits))))))))))
+ (set! x (spanish_number_from_digits millions))
+ (append
+ (if (string-equal (car x) "un")
+ (list "un" "millon")
+ (append x (list "millones")))
+ (spanish_number_from_digits sub_million))))
+
+ (t
+ (list "un" "numero" "muy" "gr'aaaaaandee")))))
+
+
+(define (spanish_token_to_words token name)
+ "(spanish_token_to_words TOKEN NAME)
+Returns a list of words for the NAME from TOKEN. This primarily
+allows the treatment of numbers, money etc."
+ (cond
+ ((string-matches name "[1-9][0-9]+")
+ (spanish_number name))
+ ((not (lts.in.alphabet name 'spanish_downcase))
+ ;; It contains some other than the lts can deal with
+ (let ((subwords))
+ (item.set_feat token "pos" "nn")
+ (mapcar
+ (lambda (letter)
+ ;; might be symbols or digits
+ (set! subwords
+ (append
+ subwords
+ (cond
+ ((string-matches letter "[0-9]")
+ (spanish_number letter))
+ ((string-matches letter "[A-ZÁÉÍÓÚÜÑ]")
+ (spanish_downcase letter))
+ (t
+ (list letter))))))
+ (symbolexplode name))
+ subwords))
+ (t
+ (list name))))
+
+(provide 'sptoken)
+