diff options
author | Matthias Urlichs <smurf@smurf.noris.de> | 2003-06-28 14:30:56 +0200 |
---|---|---|
committer | Matthias Urlichs <smurf@smurf.noris.de> | 2003-06-28 14:30:56 +0200 |
commit | 3e7895ec5ae81f7f1985650b47ae9c2c80159e9a (patch) | |
tree | 0341e6820ad1a2ec67b33e8e451839e7556eb1dd /lib/voices/spanish/el_diphone/festvox |
festvox-ellpc11k (1.4.0-3) unstable; urgency=low
* depend on festival >= 1.4.3-9 (Migration to /usr/share).
# imported from the archive
Diffstat (limited to 'lib/voices/spanish/el_diphone/festvox')
-rw-r--r-- | lib/voices/spanish/el_diphone/festvox/el_diphone.scm | 350 | ||||
-rw-r--r-- | lib/voices/spanish/el_diphone/festvox/spanint.scm | 69 | ||||
-rw-r--r-- | lib/voices/spanish/el_diphone/festvox/spanlex.scm | 757 | ||||
-rw-r--r-- | lib/voices/spanish/el_diphone/festvox/sptoken.scm | 227 |
4 files changed, 1403 insertions, 0 deletions
diff --git a/lib/voices/spanish/el_diphone/festvox/el_diphone.scm b/lib/voices/spanish/el_diphone/festvox/el_diphone.scm new file mode 100644 index 0000000..01ba7e5 --- /dev/null +++ b/lib/voices/spanish/el_diphone/festvox/el_diphone.scm @@ -0,0 +1,350 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; ;; +;;; Centre for Speech Technology Research ;; +;;; University of Edinburgh, UK ;; +;;; Copyright (c) 1996,1997 ;; +;;; All Rights Reserved. ;; +;;; ;; +;;; Permission is hereby granted, free of charge, to use and distribute ;; +;;; this software and its documentation without restriction, including ;; +;;; without limitation the rights to use, copy, modify, merge, publish, ;; +;;; distribute, sublicense, and/or sell copies of this work, and to ;; +;;; permit persons to whom this work is furnished to do so, subject to ;; +;;; the following conditions: ;; +;;; 1. The code must retain the above copyright notice, this list of ;; +;;; conditions and the following disclaimer. ;; +;;; 2. Any modifications must be clearly marked as such. ;; +;;; 3. Original authors' names are not deleted. ;; +;;; 4. The authors' names are not used to endorse or promote products ;; +;;; derived from this software without specific prior written ;; +;;; permission. ;; +;;; ;; +;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; +;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; +;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; +;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; +;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; +;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; +;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; +;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; +;;; THIS SOFTWARE. ;; +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; A Castilian Spanish male voice +;;; +;;; Authors: Alistair Conkie (initially), Borja Etxebarria, Alan W Black +;;; Eduardo Lopez (who did the diphones) +;;; +;;; Eduardo Lopez, MSc student DAI 1992 made (and spoke) the diphones. +;;; +;;; Note although the front end is free the diphones are restricted to +;;; non-commercial use only. +;;; +;;; This is by no means complete: +;;; Intonation is pretty weak and without good phrase breaks +;;; the continous downward slope with small accents is not good +;;; Numbers are not dealt with properly when they are supposed to +;;; inflect for gender. +;;; There's no part of speech tagging which woudl allow number matching +;;; phrasing etc. +;;; The diphone database is missing accented vowels and diphthongs +;;; making small but not as good as it could be. +;;; It hasn't really been tested thoroughly +;;; + +;;; Add the directory contains general spanish stuff to load-path +(defvar spanish_el_dir (cdr (assoc 'el_diphone voice-locations))) +(set! load-path (cons (path-append spanish_el_dir "festvox/") load-path)) + +;;; other files we need +(require 'spanlex) +(require 'spanint) +(require 'sptoken) +(require_module 'UniSyn) + +(defPhoneSet + spanish + ;;; Phone Features + (;; vowel or consonant + (vc + -) + ;; vowel length: short long dipthong schwa + (vlng s l d a 0) + ;; vowel height: high mid low + (vheight 1 2 3 -) + ;; vowel frontness: front mid back + (vfront 1 2 3 -) + ;; lip rounding + (vrnd + -) + ;; consonant type: stop fricative affricative nasal liquid + (ctype s f a n l 0) + ;; place of articulation: labial alveolar palatal labio-dental + ;; dental velar + (cplace l a p b d v 0) + ;; consonant voicing + (cvox + -) + ) + ;; borja: all the features are almost ok, only some problems: + ;; r is a tap, rr is a trill. We would need "vibrant". Now, coded as liquid. + ;; l and ll are lateral. Now, coded as liquid (probably it's the samething) + ;; The bdg/BDG distinction (stop/aproximant) is not done. + ;; The i and u aproximants (sampa j and w, labio, agua) are not considered, + ;; normal i and u used instead. + ;; The ficative 'y' (sampa jj, ayer) is not considered, ll used instead. + ( + (# - 0 - - - 0 0 -) + (a + l 3 2 - 0 0 -) + (e + l 2 1 - 0 0 -) + (i + l 1 1 - 0 0 -) + (o + l 2 3 + 0 0 -) + (u + l 1 3 + 0 0 -) + (i0 + s 1 1 - 0 0 -) ;; weak vowels in dipthongs + (u0 + s 1 3 + 0 0 -) ;; weak vowels in dipthongs + + (a1 + l 3 2 - 0 0 -) + (e1 + l 2 1 - 0 0 -) + (i1 + l 1 1 - 0 0 -) + (o1 + l 2 3 + 0 0 -) + (u1 + l 1 3 + 0 0 -) + + (p - 0 - - - s l -) + (t - 0 - - - s d -) + (k - 0 - - - s v -) + (b - 0 - - - s l +) + (d - 0 - - - s d +) + (g - 0 - - - s v +) + + (f - 0 - - - f b -) + (th - 0 - - - f d -) + (s - 0 - - - f a -) + (x - 0 - - - f v -) + + (ch - 0 - - - a p -) + + (m - 0 - - - n l +) + (n - 0 - - - n a +) + (ny - 0 - - - n p +) + + (l - 0 - - - l a +) + (ll - 0 - - - l p +) + + (r - 0 - - - l a +) + (rr - 0 - - - l a +) + ) +) +(PhoneSet.silences '(#)) + +;;; Part of speech down by crude lookup using gpos +(set! spanish_guess_pos +'((fn + el la lo los las + un una unos unas +;; + mi tu su mis tus sus + nuestra vuestra nuestras vuestras nuestro vuestro nuestros vuestros + me te le nos os les se + al del +;; + a ante bajo cabe con contra de desde en entre + hacia hasta para por sin sobre tras mediante +;; + y e ni mas o "\'o" u pero aunque si + porque que quien cuando como donde cual cuan + aun pues tan mientras sino ) + (partnums + dieci venti trentai cuarentai cincuentai sesentai ochentai noventai) + ) +) + +;;; Phrase breaks +;;; use punctuation +(set! spanish_phrase_cart_tree +' +((lisp_token_end_punc in ("'" "\"" "?" "." "," ":" ";")) + ((B)) + ((n.name is 0) + ((B)) + ((NB))))) + +;;; Intonation +(set! spanish_accent_cart_tree + ' + ( + (R:SylStructure.parent.gpos is content) + ( (stress is 1) + ((Accented)) + ((NONE)) + ) + ) +) + + +;;; Duration +(set! spanish_dur_tree + ' + ((R:SylStructure.parent.R:Syllable.p.syl_break > 1 ) ;; clause initial + ((R:SylStructure.parent.stress is 1) + ((1.5)) + ((1.2))) + ((R:SylStructure.parent.syl_break > 1) ;; clause final + ((R:SylStructure.parent.stress is 1) + ((1.5)) + ((1.2))) + ((R:SylStructure.parent.stress is 1) + ((ph_vc is +) + ((1.2)) + ((1.0))) + ((1.0)))))) + +(set! spanish_el_phone_data +'( + (# 0.0 0.250) + (a 0.0 0.080) + (e 0.0 0.080) + (i 0.0 0.070) + (o 0.0 0.080) + (u 0.0 0.070) + (i0 0.0 0.040) + (u0 0.0 0.040) + (a1 0.0 0.090) + (e1 0.0 0.090) + (i1 0.0 0.080) + (o1 0.0 0.090) + (u1 0.0 0.080) + (b 0.0 0.065) + (ch 0.0 0.135) + (d 0.0 0.060) + (f 0.0 0.100) + (g 0.0 0.080) + (j 0.0 0.100) + (k 0.0 0.100) + (l 0.0 0.080) + (ll 0.0 0.105) + (m 0.0 0.070) + (n 0.0 0.080) + (ny 0.0 0.110) + (p 0.0 0.100) + (r 0.0 0.030) + (rr 0.0 0.080) + (s 0.0 0.110) + (t 0.0 0.085) + (th 0.0 0.100) + (x 0.0 0.130) +)) + +(set! el_lpc_sep + (list + '(name "el_lpc_sep") + (list 'index_file (path-append spanish_el_dir "dic/eldiph.est")) + '(grouped "false") + (list 'coef_dir (path-append spanish_el_dir "lpc")) + (list 'sig_dir (path-append spanish_el_dir "lpc")) + '(coef_ext ".lpc") + '(sig_ext ".res") + '(default_diphone "#-#"))) + +(set! el_lpc_group + (list + '(name "el_lpc_group") + (list 'index_file + (path-append spanish_el_dir "group/ellpc11k.group")) + '(grouped "true") + '(default_diphone "#-#"))) + +;; Go ahead and set up the diphone db +(us_diphone_init el_lpc_group) + +(define (el_diphone_fix utt) +"(el_diphone_fix UTT) +Map accents vowels to unaccented ones because the db doesn't +have them." + (mapcar + (lambda (s) + (let ((name (item.name s))) + (cond + ((string-matches name ".1") + (item.set_feat s "us_diphone" (string-before name "1"))) + ((string-matches name ".0") + (item.set_feat s "us_diphone" (string-before name "0")))))) + (utt.relation.items utt 'Segment)) + utt) + +(define (spanish_voice_reset) + "(spanish_voice_reset) +Reset global variables back to previous voice." + (set! token.prepunctuation spanish_previous_tok_prepunc) +) + +;;; Full voice definition +(define (voice_el_diphone) +"(voice_spanish_el) +Set up synthesis for Male Spanish speaker: Eduardo Lopez" + (voice_reset) + (Parameter.set 'Language 'spanish) + ;; Phone set + (Parameter.set 'PhoneSet 'spanish) + (PhoneSet.select 'spanish) + + ;; numeric expansion + (Parameter.set 'Token_Method 'Token_Any) + (set! token_to_words spanish_token_to_words) + + ;; Because of use of ' for accents remove it from prepunctuation + (set! spanish_previous_tok_prepunc token.prepunctuation) + (set! token.prepunctuation "\"`({[") + + ;; No pos prediction (get it from lexicon) + (set! pos_lex_name nil) + ;; Phrase break prediction by punctuation + (set! pos_supported nil) ;; well not real pos anyhow + ;; Phrasing + (set! phrase_cart_tree spanish_phrase_cart_tree) + (Parameter.set 'Phrase_Method 'cart_tree) + ;; Lexicon selection + (lex.select "spanish") + + ;; Accent and tone prediction + (set! int_accent_cart_tree spanish_accent_cart_tree) + + (Parameter.set 'Int_Target_Method 'Simple) + + (Parameter.set 'Int_Method 'General) + (set! int_general_params (list (list 'targ_func targ_func1))) + (set! guess_pos spanish_guess_pos) + + ;; Duration prediction + (set! duration_cart_tree spanish_dur_tree) + (set! duration_ph_info spanish_el_phone_data) + (Parameter.set 'Duration_Method 'Tree_ZScores) + + ;; Waveform synthesizer: diphones + (set! UniSyn_module_hooks (list el_diphone_fix)) + (set! us_abs_offset 0.0) + (set! window_factor 1.0) + (set! us_rel_offset 0.0) + (set! us_gain 0.9) + + (Parameter.set 'Synth_Method 'UniSyn) + (Parameter.set 'us_sigpr 'lpc) + (us_db_select 'el_lpc_group) + + ;; set callback to restore some original values changed by the spanish voice + (set! current_voice_reset spanish_voice_reset) + + (set! current-voice 'el_diphone) +) + +(proclaim_voice + 'el_diphone + '((language spanish) + (gender male) + (dialect castilian) + (description + "This voice provides a Castilian Spanish male voice using a + residual excited LPC diphone synthesis method. The lexicon + is provived by a set of letter to sound rules producing pronunciation + accents and syllabification. The durations, intonation and + prosodic phrasing are minimal but are acceptable for simple + examples."))) + +(provide 'el_diphone) diff --git a/lib/voices/spanish/el_diphone/festvox/spanint.scm b/lib/voices/spanish/el_diphone/festvox/spanint.scm new file mode 100644 index 0000000..6a1d44d --- /dev/null +++ b/lib/voices/spanish/el_diphone/festvox/spanint.scm @@ -0,0 +1,69 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; ;; +;;; Centre for Speech Technology Research ;; +;;; University of Edinburgh, UK ;; +;;; Copyright (c) 1996,1997 ;; +;;; All Rights Reserved. ;; +;;; ;; +;;; Permission is hereby granted, free of charge, to use and distribute ;; +;;; this software and its documentation without restriction, including ;; +;;; without limitation the rights to use, copy, modify, merge, publish, ;; +;;; distribute, sublicense, and/or sell copies of this work, and to ;; +;;; permit persons to whom this work is furnished to do so, subject to ;; +;;; the following conditions: ;; +;;; 1. The code must retain the above copyright notice, this list of ;; +;;; conditions and the following disclaimer. ;; +;;; 2. Any modifications must be clearly marked as such. ;; +;;; 3. Original authors' names are not deleted. ;; +;;; 4. The authors' names are not used to endorse or promote products ;; +;;; derived from this software without specific prior written ;; +;;; permission. ;; +;;; ;; +;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; +;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; +;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; +;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; +;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; +;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; +;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; +;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; +;;; THIS SOFTWARE. ;; +;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Using the general intonation module add flattened hat accents +;;; of Accented syllables +;;; + +(define (targ_func1 utt syl ) + "funzioi bat" + (let ((start (item.feat syl 'syllable_start)) + (end (item.feat syl 'syllable_end)) + (ulen (item.feat (utt.relation.last utt 'Segment ) 'segment_end)) + nstart nend fustart fuend fuend fstart fend) + (set! nstart (/ start ulen)) + (set! nend (/ end ulen)) + (set! fustart '130) + (set! fuend '110) + (set! fstart (+ (* (- fuend fustart) nstart) fustart)) + (set! fend (+ (* (- fuend fustart) nend) fustart)) + + (cond + ((equal? (item.feat syl "R:Intonation.daughter1.name") "Accented") + (list + (list start fstart) + (list (+ start 0.010) (+ fstart 10 )) + (list (- end 0.010) (+ fstart 8 )) + (list end fend) + )) + ((not (item.next syl)) + (list + (list end fuend))) + ((not (item.prev syl)) + (list + (list start fustart))) + (t + nil)))) + + +(provide 'spanint) diff --git a/lib/voices/spanish/el_diphone/festvox/spanlex.scm b/lib/voices/spanish/el_diphone/festvox/spanlex.scm new file mode 100644 index 0000000..2a653ee --- /dev/null +++ b/lib/voices/spanish/el_diphone/festvox/spanlex.scm @@ -0,0 +1,757 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; ;; +;;; Centre for Speech Technology Research ;; +;;; University of Edinburgh, UK ;; +;;; Copyright (c) 1996,1997 ;; +;;; All Rights Reserved. ;; +;;; ;; +;;; Permission is hereby granted, free of charge, to use and distribute ;; +;;; this software and its documentation without restriction, including ;; +;;; without limitation the rights to use, copy, modify, merge, publish, ;; +;;; distribute, sublicense, and/or sell copies of this work, and to ;; +;;; permit persons to whom this work is furnished to do so, subject to ;; +;;; the following conditions: ;; +;;; 1. The code must retain the above copyright notice, this list of ;; +;;; conditions and the following disclaimer. ;; +;;; 2. Any modifications must be clearly marked as such. ;; +;;; 3. Original authors' names are not deleted. ;; +;;; 4. The authors' names are not used to endorse or promote products ;; +;;; derived from this software without specific prior written ;; +;;; permission. ;; +;;; ;; +;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; +;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; +;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; +;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; +;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; +;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; +;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; +;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; +;;; THIS SOFTWARE. ;; +;;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Authors: Alistair Conkie, Borja Etxebarria and Alan W Black +;;; +;;; letter to sounds rules and functions to produce stressed syllabified +;;; pronunciations for Spanish words +;;; There is some history in one set of the LTS rules back to +;;; Rob van Gerwen, University of Nijmegen. +;;; + +;;; Lexicon +(lex.create "spanish") +(lex.set.phoneset "spanish") +(lex.set.lts.method 'spanish_lts) +(lex.set.lts.ruleset 'spanish) + +;;; This which just have to be in the lexicon +;(lex.add.entry '("a" nn (((a) 0)))) +(lex.add.entry '("b" nn (((b e) 0)))) +(lex.add.entry '("c" nn (((th e) 0)))) +(lex.add.entry '("d" nn (((d e) 0)))) +;(lex.add.entry '("e" nn (((e) 0)))) +(lex.add.entry '("f" nn (((e1) 1)((f e) 0)))) +(lex.add.entry '("g" nn (((g e) 0)))) +(lex.add.entry '("h" nn (((a1) 1)((ch e) 0)))) +;(lex.add.entry '("i" nn (((i) 0)))) +(lex.add.entry '("j" nn (((x o1) 1)((t a) 0)))) +(lex.add.entry '("k" nn (((k a) 0)))) +(lex.add.entry '("l" nn (((e1) 1)((l e) 0)))) +(lex.add.entry '("m" nn (((e1) 1)((m e) 0)))) +(lex.add.entry '("n" nn (((e1) 1)((n e) 0)))) +(lex.add.entry '("~n" nn (((e1) 1)((ny e) 0)))) +(lex.add.entry '("ñ" nn (((e1) 1)((ny e) 0)))) +;(lex.add.entry '("o" nn (((o) 0)))) +(lex.add.entry '("p" nn (((p e) 0)))) +(lex.add.entry '("q" nn (((k u) 0)))) +(lex.add.entry '("r" nn (((e1) 1)((rr e) 0)))) +(lex.add.entry '("s" nn (((e1) 1) ((s e) 0)))) +(lex.add.entry '("t" nn (((t e) 0)))) +;(lex.add.entry '("u" nn (((u) 0)))) +(lex.add.entry '("v" nn (((u1) 1)((b e) 0)))) +(lex.add.entry '("w" nn (((u) 0) ((b e) 0) ((d o1) 1) ((b l e) 0)))) +(lex.add.entry '("x" nn (((e1) 1)((k i s) 0)))) +;(lex.add.entry '("y" nn (((i) 0)((g r i e1) 1))((g a) 0))) ;; doubt: stres +(lex.add.entry '("z" nn (((th e1) 1)((t a) 0)))) +;(lex.add.entry '("á" nn (((a) 0)))) +;(lex.add.entry '("é" nn (((e) 0)))) +;(lex.add.entry '("í" nn (((i) 0)))) +;(lex.add.entry '("ó" nn (((o) 0)))) +;(lex.add.entry '("ú" nn (((u) 0)))) +;(lex.add.entry '("ü" nn (((u) 0)))) +(lex.add.entry + '("*" n (((a s) 0) ((t e) 0) ((r i1 s) 1) ((k o) 0)))) +(lex.add.entry + '("%" n (((p o r) 0) ((th i e1 n) 1) ((t o) 0)))) +(lex.add.entry + '("&" n (((a1 m) 1) ((p e r) 0) ((s a n) 0)))) +(lex.add.entry + '("$" n (((d o1) 1) ((l a r) 0)))) +(lex.add.entry + '("#" n (((a l) 0) ((m u a) 0) ((d i1) 1) ((ll a) 0)))) +(lex.add.entry + '("@" n (((a) 0) ((rr o1) 1) ((b a) 0)))) +(lex.add.entry + '("+" n (((m a s) 0)) ((pos "K7%" "OA%" "T-%")))) +(lex.add.entry + '("^" n (((k a1) 1) ((r e t) 0)) ((pos "K6$")))) +(lex.add.entry + '("~" n (((t i1 l) 1) ((d e) 0)) ((pos "K6$")))) +(lex.add.entry + '("=" n (((i) 0) ((g u a1 l) 1)))) +(lex.add.entry + '("/" n (((e1 n ) 1) ((t r e) 0)))) ;; $$$division, etc. +(lex.add.entry + '("\\" n (((b a1) 1) ((rr a) 1)))) +(lex.add.entry + '("_" n (((s u b) 0) ((rr a) 0) ((ll a1) 1) ((d o) 0)) )) +(lex.add.entry + '("|" n (((b a1) 1) ((rr a) 0)))) +(lex.add.entry + '(">" n ((( m a ) 0) ((ll o1 r) 1) ((k e) 0)))) +(lex.add.entry + '("<" n ((( m e ) 0) ((n o1 r) 1) ((k e) 0)))) +(lex.add.entry + '("[" n ((( a) 0) ((b r i1 r) 1) ((k o r) 0)((ch e1) 1)((t e) 0)))) +(lex.add.entry + '("]" n (((th e) 0) ((rr a1 r) 1) ((k o r) 0)((ch e1) 1)((t e) 0)))) +(lex.add.entry + '(" " n (((e s) 0)((p a1) 1)((th i o) 0)))) +(lex.add.entry + '("\t" n (((t a1 b) 1)))) +(lex.add.entry + '("\n" n (((n u e1) 1) ((b a) 0)((l i) 1) ((n e a) 0)))) + +(lex.add.entry '("." punc nil)) +(lex.add.entry '("." nn (((p u1 n) 1) ((t o) 0)))) +(lex.add.entry '("'" punc nil)) +(lex.add.entry '(":" punc nil)) +(lex.add.entry '(";" punc nil)) +(lex.add.entry '("," punc nil)) +(lex.add.entry '("," nn (((k o1) 1) ((m a) 0)))) +(lex.add.entry '("-" punc nil)) +(lex.add.entry '("\"" punc nil)) +(lex.add.entry '("`" punc nil)) +(lex.add.entry '("?" punc nil)) +(lex.add.entry '("!" punc nil)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Down cases with accents +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(lts.ruleset + spanish_downcase + ( ) + ( + ( [ a ] = a ) + ( [ e ] = e ) + ( [ i ] = i ) + ( [ o ] = o ) + ( [ u ] = u ) + ( [ á ] = á ) + ( [ é ] = é ) + ( [ í ] = í ) + ( [ ó ] = ó ) + ( [ ú ] = ú ) + ( [ ü ] = ü ) + ( [ b ] = b ) + ( [ c ] = c ) + ( [ "ç" ] = s ) + ( [ d ] = d ) + ( [ f ] = f ) + ( [ g ] = g ) + ( [ h ] = h ) + ( [ j ] = j ) + ( [ k ] = k ) + ( [ l ] = l ) + ( [ m ] = m ) + ( [ n ] = n ) + ( [ ñ ] = ñ ) + ( [ p ] = p ) + ( [ q ] = q ) + ( [ r ] = r ) + ( [ s ] = s ) + ( [ t ] = t ) + ( [ v ] = v ) + ( [ w ] = w ) + ( [ x ] = x ) + ( [ y ] = y ) + ( [ z ] = z ) + ( [ "\'" ] = "\'" ) + ( [ : ] = : ) + ( [ ~ ] = ~ ) + ( [ "\"" ] = "\"" ) + ( [ A ] = a ) + ( [ E ] = e ) + ( [ I ] = i ) + ( [ O ] = o ) + ( [ U ] = u ) + ( [ Á ] = á ) + ( [ É ] = é ) + ( [ Í ] = í ) + ( [ Ó ] = ó ) + ( [ Ú ] = ú ) + ( [ Ü ] = ü ) + ( [ B ] = b ) + ( [ C ] = c ) + ( [ "Ç" ] = s ) + ( [ D ] = d ) + ( [ F ] = f ) + ( [ G ] = g ) + ( [ H ] = h ) + ( [ J ] = j ) + ( [ K ] = k ) + ( [ L ] = l ) + ( [ M ] = m ) + ( [ N ] = n ) + ( [ Ñ ] = ñ ) + ( [ P ] = p ) + ( [ Q ] = q ) + ( [ R ] = r ) + ( [ S ] = s ) + ( [ T ] = t ) + ( [ V ] = v ) + ( [ W ] = w ) + ( [ X ] = x ) + ( [ Y ] = y ) + ( [ Z ] = z ) +)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Main letter to sound rules +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; borja: some rules updated or deleted. +; Rules for directly accented vowels, are typed using +; the sun character set and codepage ISO 8859/1 Latin 1. This +; matches the one on Linux and Windows for our purposes, so +; almost everybody happy. +; Umlaut (dieresis) management. I have considered +; three diferent ways to include the umlaut for spanish in +; festival, using <:> or <">. example: ping:uino ping"uino, +; and of course, directly typing the weird thing (ü). +; Accented vowels can be typed both directly (á) or as a +; quote preceding the plain vowel ('a). example: cami'on camión + +(lts.ruleset +; Name of rule set + spanish +; Sets used in the rules +( + (LNS l n s ) + (DNSR d n s r ) + (EI e i é í) ; note that accented vowels are included in this set + (AEIOUt á é í ó ú ) + (V a e i o u ) + (C b c d f g h j k l m n ñ ~ p q r s t v w x y z ) +) +; Rules +( + + ; these weird rule, to break dipthongs at end of words like atribuid atribuido,... + ( "'" V* C* u [ i ] DNSR # = i ) + ( AEIOUt V* C* u [ i ] DNSR # = i ) ;; $$$ ~n and so, what will do? + ( u [ i ] DNSR # = i1 ) + ( "'" V* C* u [ i ] d V # = i ) + ( AEIOUt V* C* u [ i ] d V # = i ) + ( u [ i ] d AEIOUt # = i ) ;; not sure about these two + ( u [ i ] d V # = i1 ) ;; + + + ( [ a ] = a ) + ( [ e ] = e ) + ( [ i ] = i ) + ( [ o ] = o ) + ( [ u ] = u ) + ( [ "'" a ] = a1 ) + ( [ "'" e ] = e1 ) + ( [ "'" i ] = i1 ) + ( [ "'" o ] = o1 ) + ( [ "'" u ] = u1 ) + ( [ á ] = a1 ) + ( [ é ] = e1 ) + ( [ í ] = i1 ) + ( [ ó ] = o1 ) + ( [ ú ] = u1 ) + ( [ ":" u ] = u ) ; umlaut (u dieresis) (should not happen, only with g, and already removed) + ( [ "\"" u ] = u ) + ( [ ü ] = u ) + + ( [ b ] = b ) + ( [ v ] = b ) + ( [ c ] "'" EI = th ) + ( [ c ] EI = th ) + ( [ c h ] = ch ) + ( [ c ] = k ) + ( [ d ] = d ) + ( [ f ] = f ) + ( [ g ] "'" EI = x ) + ( [ g ] EI = x ) + ( [ g u ] "'" EI = g ) + ( [ g u ] EI = g ) + + ( [ g ":" u ] EI = g u ) ; umlaut (u dieresis) + ( [ g ":" u ] "'" EI = g u ) + ( [ g "\"" u ] EI = g u ) + ( [ g "\"" u ] "'" EI = g u ) + ( [ g ü ] EI = g u ) + ( [ g ü ] "'" EI = g u ) + + ( [ g ] = g ) + ( [ h ] = ) + ( [ j ] = x ) + ( [ k ] = k ) + ( [ l l ] # = l ) + ( [ l l ] = ll ) + ( [ l ] = l ) + ( [ m ] = m ) + ( [ "~" n ] = ny ) + ( [ ñ ] = ny ) + ( [ n ] = n ) + ( [ p ] = p ) + ( [ p h ] = f ) ;; to speak a bit of greek. + ( [ q u ] a = k u ) ;; no castillian word uses this, but it would be pronounced this way in greek and foreign words (aquarium, quo, etc) + ( [ q u ] = k ) + ( [ q ] = k ) ;; should't happend, but if you type it... + ( [ r r ] = rr ) + ( # [ r ] = rr ) + ( LNS [ r ] = rr ) + ( [ r ] = r ) + ( [ s ] = s ) + ( # [ s ] C = e s ) + ( # [ s ] "'" C = e s ) + ( # [ s ] ":" C = e s ) + ( # [ s ] "\"" C = e s ) + ( [ t ] = t ) + ( [ w ] = u ) + ( [ x ] = k s ) + + ( [ y ] # = i ) + ( [ y ] C = i ) + ( [ y ] "'" C = i ) + ( [ y ] ":" C = i ) + ( [ y ] "\"" C = i ) + ( [ y ] = ll ) + + ( [ z ] = th ) + + ; quotes are used for vowel accents in foreign keyboards (i.e. cami'on). + ; remove those that were not before a vowel. same with other signs. + ( [ "'" ] = ) + ( [ ":" ] = ) + ( [ "\"" ] = ) + ( [ "~" ] = ) +)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Spanish sylabification by rewrite rules +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(lts.ruleset + spanish_syl + ( (V a1 i1 u1 e1 o1 a i u e o ) + (IUT i1 u1 ) + (C b ch d f g x k l ll m n ny p r rr s t th ) + ) + ;; Rules will add - at syllable boundary + ( + ;; valid CC groups + ( V C * [ b l ] V = - b l ) + ( V C * [ b r ] V = - b r ) + ( V C * [ k l ] V = - k l ) + ( V C * [ k r ] V = - k r ) + ( V C * [ k s ] V = - k s ) ; for words with "x" + ( V C * [ d r ] V = - d r ) + ( V C * [ f l ] V = - f l ) + ( V C * [ f r ] V = - f r ) + ( V C * [ g l ] V = - g l ) + ( V C * [ g r ] V = - g r ) + ( V C * [ p l ] V = - p l ) + ( V C * [ p r ] V = - p r ) + ( V C * [ t l ] V = - t l ) + ( V C * [ t r ] V = - t r ) + + ;; triptongs + ( [ i a i ] = i a i ) + ( [ i a u ] = i a u ) + ( [ u a i ] = u a i ) + ( [ u a u ] = u a u ) + ( [ i e i ] = i e i ) + ( [ i e u ] = i e u ) + ( [ u e i ] = u e i ) + ( [ u e u ] = u e u ) + ( [ i o i ] = i o i ) + ( [ i o u ] = i o u ) + ( [ u o i ] = u o i ) + ( [ u o u ] = u o u ) + ( [ i a1 i ] = i a1 i ) + ( [ i a1 u ] = i a1 u ) + ( [ u a1 i ] = u a1 i ) + ( [ u a1 u ] = u a1 u ) + ( [ i e1 i ] = i e1 i ) + ( [ i e1 u ] = i e1 u ) + ( [ u e1 i ] = u e1 i ) + ( [ u e1 u ] = u e1 u ) + ( [ i o1 i ] = i o1 i ) + ( [ i o1 u ] = i o1 u ) + ( [ u o1 i ] = u o1 i ) + ( [ u o1 u ] = u o1 u ) + + ;; break invalid triptongs + ( IUT [ i a ] = - i a ) + ( IUT [ i e ] = - i e ) + ( IUT [ i o ] = - i o ) + ( IUT [ u a ] = - u a ) + ( IUT [ u e ] = - u e ) + ( IUT [ u o ] = - u o ) + ( IUT [ a i ] = - a i ) + ( IUT [ e i ] = - e i ) + ( IUT [ o i ] = - o i ) + ( IUT [ a u ] = - a u ) + ( IUT [ e u ] = - e u ) + ( IUT [ o u ] = - o u ) + ( IUT [ i u ] = - i u ) + ( IUT [ u i ] = - u i ) + ( IUT [ i a1 ] = - i a1 ) + ( IUT [ i e1 ] = - i e1 ) + ( IUT [ i o1 ] = - i o1 ) + ( IUT [ u a1 ] = - u a1 ) + ( IUT [ u e1 ] = - u e1 ) + ( IUT [ u o1 ] = - u o1 ) + ( IUT [ a1 i ] = - a1 i ) + ( IUT [ e1 i ] = - e1 i ) + ( IUT [ o1 i ] = - o1 i ) + ( IUT [ a1 u ] = - a1 u ) + ( IUT [ e1 u ] = - e1 u ) + ( IUT [ o1 u ] = - o1 u ) + ( IUT [ i u1 ] = - i u1 ) + ( IUT [ u i1 ] = - u i1 ) + + ;; diptongs + ( [ i a ] = i a ) + ( [ i e ] = i e ) + ( [ i o ] = i o ) + ( [ u a ] = u a ) + ( [ u e ] = u e ) + ( [ u o ] = u o ) + ( [ a i ] = a i ) + ( [ e i ] = e i ) + ( [ o i ] = o i ) + ( [ a u ] = a u ) + ( [ e u ] = e u ) + ( [ o u ] = o u ) + ( [ i u ] = i u ) + ( [ u i ] = u i ) + ( [ i a1 ] = i a1 ) + ( [ i e1 ] = i e1 ) + ( [ i o1 ] = i o1 ) + ( [ u a1 ] = u a1 ) + ( [ u e1 ] = u e1 ) + ( [ u o1 ] = u o1 ) + ( [ a1 i ] = a1 i ) + ( [ e1 i ] = e1 i ) + ( [ o1 i ] = o1 i ) + ( [ a1 u ] = a1 u ) + ( [ e1 u ] = e1 u ) + ( [ o1 u ] = o1 u ) + ( [ u1 i ] = u1 i ) + ( [ i1 u ] = i1 u ) + + ;; Vowels preceeded by vowels are syllable breaks + ;; triptongs and diptongs are dealt with above + ( V [ a ] = - a ) + ( V [ i ] = - i ) + ( V [ u ] = - u ) + ( V [ e ] = - e ) + ( V [ o ] = - o ) + ( V [ a1 ] = - a1 ) + ( V [ e1 ] = - e1 ) + ( V [ i1 ] = - i1 ) + ( V [ o1 ] = - o1 ) + ( V [ u1 ] = - u1 ) + + ;; If any consonant is followed by a vowel and there is a vowel + ;; before it, its a syl break + ;; the consonant cluster are dealt with above + ( V C * [ b ] V = - b ) + ( V C * [ ch ] V = - ch ) + ( V C * [ d ] V = - d ) + ( V C * [ f ] V = - f ) + ( V C * [ g ] V = - g ) + ( V C * [ x ] V = - x ) + ( V C * [ k ] V = - k ) + ( V C * [ l ] V = - l ) + ( V C * [ ll ] V = - ll ) + ( V C * [ m ] V = - m ) + ( V C * [ n ] V = - n ) + ( V C * [ ny ] V = - ny ) + ( V C * [ p ] V = - p ) + ( V C * [ r ] V = - r ) + ( V C * [ rr ] V = - rr ) + ( V C * [ s ] V = - s ) + ( V C * [ t ] V = - t ) + ( V C * [ th ] V = - th ) + + ;; Catch all consonants on their own (at end of word) + ;; and vowels not preceded by vowels are just written as it + ( [ b ] = b ) + ( [ ch ] = ch ) + ( [ d ] = d ) + ( [ f ] = f ) + ( [ g ] = g ) + ( [ x ] = x ) + ( [ k ] = k ) + ( [ l ] = l ) + ( [ ll ] = ll ) + ( [ m ] = m ) + ( [ n ] = n ) + ( [ ny ] = ny ) + ( [ p ] = p ) + ( [ r ] = r ) + ( [ rr ] = rr ) + ( [ s ] = s ) + ( [ t ] = t ) + ( [ th ] = th ) + ( [ a ] = a ) + ( [ i ] = i ) + ( [ u ] = u ) + ( [ e ] = e ) + ( [ o ] = o ) + ( [ a1 ] = a1 ) + ( [ i1 ] = i1 ) + ( [ u1 ] = u1 ) + ( [ e1 ] = e1 ) + ( [ o1 ] = o1 ) + ) +) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Stress assignment in unstress words by rewrite rules +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(lts.ruleset + ;; Assign stress to a vowel when non-exists + spanish.stress + ( + (UV a i u e o) + (V a1 i1 u1 e1 o1 a i u e o) + (V1 a1 i1 u1 e1 o1) + (VNS n s a i u e o) + (C b ch d f g j k l ll m n ny p r rr s t th x ) + (VC b ch d f g j k l ll m n ny p r rr s t th x a1 i1 u1 e1 o1 a i u e o) + (ANY b ch d f g j k l ll m n ny p r rr s t th x - a1 i1 u1 e1 o1 a i u e o) + (notNS b ch d f g j k l ll m ny p r rr t th x ) + (iu i u ) + (aeo a e o) + ) + ( + ;; consonants to themselves + ( [ b ] = b ) + ( [ d ] = d ) + ( [ ch ] = ch ) + ( [ f ] = f ) + ( [ g ] = g ) + ( [ j ] = j ) + ( [ k ] = k ) + ( [ l ] = l ) + ( [ ll ] = ll ) + ( [ m ] = m ) + ( [ n ] = n ) + ( [ ny ] = ny ) + ( [ p ] = p ) + ( [ r ] = r ) + ( [ rr ] = rr ) + ( [ s ] = s ) + ( [ t ] = t ) + ( [ th ] = th ) + ( [ x ] = x ) + ( [ - ] = - ) + ;; stressed vowels to themselves + ( [ a1 ] = a1 ) + ( [ i1 ] = i1 ) + ( [ u1 ] = u1 ) + ( [ e1 ] = e1 ) + ( [ o1 ] = o1 ) + + ( V1 ANY * [ a ] = a ) + ( V1 ANY * [ e ] = e ) + ( V1 ANY * [ i ] = i ) + ( V1 ANY * [ o ] = o ) + ( V1 ANY * [ u ] = u ) + ( [ a ] ANY * V1 = a ) + ( [ e ] ANY * V1 = e ) + ( [ i ] ANY * V1 = i ) + ( [ o ] ANY * V1 = o ) + ( [ u ] ANY * V1 = u ) + + ;; We'll only get here when the vowel is in an unstressed word + ;; two more syllables so don't worry about it yet + ( [ a ] VC * - VC * - = a ) + ( [ e ] VC * - VC * - = e ) + ( [ i ] VC * - VC * - = i ) + ( [ o ] VC * - VC * - = o ) + ( [ u ] VC * - VC * - = u ) + + ( [ a ] ANY * - VC * aeo i # = a ) + ( [ e ] ANY * - VC * aeo i # = e ) + ( [ i ] ANY * - VC * aeo i # = i ) + ( [ o ] ANY * - VC * aeo i # = o ) + ( [ u ] ANY * - VC * aeo i # = u ) + + ( [ a ] VC * - VC * VNS # = a1 ) + ( [ e ] VC * - VC * VNS # = e1 ) + ( [ o ] VC * - VC * VNS # = o1 ) + ( [ i ] aeo C * - VC * VNS # = i ) + ( [ u ] aeo C * - VC * VNS # = u ) + ( aeo [ i ] C * - VC * VNS # = i ) + ( aeo [ u ] C * - VC * VNS # = u ) + ( [ u ] C * - VC * VNS # = u1 ) + ( [ i ] C * - VC * VNS # = i1 ) + + ( [ a ] i # = a1 ) + ( [ e ] i # = e1 ) + ( [ o ] i # = o1 ) + + ;; stress on previous syllable + ( - VC * [ a ] VC * VNS # = a ) + ( - VC * [ e ] VC * VNS # = e ) + ( - VC * [ i ] VC * VNS # = i ) + ( - VC * [ o ] VC * VNS # = o ) + ( - VC * [ u ] VC * VNS # = u ) + ( - VC * [ a ] # = a ) + ( - VC * [ e ] # = e ) + ( - VC * [ i ] # = i ) + ( - VC * [ o ] # = o ) + ( - VC * [ u ] # = u ) + + ;; stress on final syllable + ( [ a ] VC * # = a1 ) + ( [ e ] VC * # = e1 ) + ( [ o ] VC * # = o1 ) + ( aeo [ i ] VC * # = i ) + ( aeo [ u ] VC * # = u ) + ( [ i ] aeo VC * # = i ) + ( [ u ] aeo VC * # = u ) + ( [ i ] VC * # = i1 ) + ( [ u ] VC * # = u1 ) + + ( [ a ] = a ) + ( [ e ] = e ) + ( [ i ] = i ) + ( [ o ] = o ) + ( [ u ] = u ) + +)) + +(lts.ruleset + ;; reduce i and u in diphthongs to u0 i0 + spanish_weak_vowels + ( + (aeo a e o a1 e1 o1 i1 u1 ) + ) + ( + ;; consonants to themselves + ( [ b ] = b ) + ( [ d ] = d ) + ( [ ch ] = ch ) + ( [ f ] = f ) + ( [ g ] = g ) + ( [ j ] = j ) + ( [ k ] = k ) + ( [ l ] = l ) + ( [ ll ] = ll ) + ( [ m ] = m ) + ( [ n ] = n ) + ( [ ny ] = ny ) + ( [ p ] = p ) + ( [ r ] = r ) + ( [ rr ] = rr ) + ( [ s ] = s ) + ( [ t ] = t ) + ( [ th ] = th ) + ( [ x ] = x ) + ( [ - ] = - ) + ;; stressed vowels to themselves + ( [ a1 ] = a1 ) + ( [ i1 ] = i1 ) + ( [ u1 ] = u1 ) + ( [ e1 ] = e1 ) + ( [ o1 ] = o1 ) + + ( aeo [ i ] = i0 ) + ( [ i ] aeo = i0 ) + ( aeo [ u ] = u0 ) + ( [ u ] aeo = u0 ) + + ( [ a ] = a ) + ( [ i ] = i ) + ( [ u ] = u ) + ( [ e ] = e ) + ( [ o ] = o ) +)) + +;;; +;;; Function to turn word into lexical entry for Spanish +;;; +;;; First uses lts to get phoneme string then assigns stress if +;;; there is no stress and then uses a third set of rules to +;;; mark syllable boundaries, finally converting that list +;;; to the bracket structure festival requires +;;; + +(define (spanish_lts word features) + "(spanish_lts WORD FEATURES) +Using various letter to sound rules build a Spanish pronunciation of +WORD." + (let (phones syl stresssyl dword weakened) + (if (lts.in.alphabet word 'spanish_downcase) + (set! dword (spanish_downcase word)) + (set! dword (spanish_downcase "equis"))) + (set! phones (lts.apply dword 'spanish)) + (set! syl (lts.apply phones 'spanish_syl)) + (if (spanish_is_a_content_word + (apply string-append dword) + spanish_guess_pos) + (set! stresssyl (lts.apply syl 'spanish.stress)) + (set! stresssyl syl)) ;; function words leave as is + (set! weakened (lts.apply stresssyl 'spanish_weak_vowels)) + (list word + nil + (spanish_tosyl_brackets weakened)))) + +(define (spanish_is_a_content_word word poslist) + "(spanish_is_a_content_word WORD POSLIST) +Check explicit list of function words and return t if this is not +listed." + (cond + ((null poslist) + t) + ((member_string word (cdr (car poslist))) + nil) + (t + (spanish_is_a_content_word word (cdr poslist))))) + +(define (spanish_downcase word) + "(spanish_downcase WORD) +Downs case word by letter to sound rules becuase or accented form +this can't use the builtin downcase function." + (lts.apply word 'spanish_downcase)) + +(define (spanish_tosyl_brackets phones) + "(spanish_tosyl_brackets phones) +Takes a list of phones containing - as syllable boundary. Construct the +Festival bracket structure." + (let ((syl nil) (syls nil) (p phones) (stress 0)) + (while p + (set! syl nil) + (set! stress 0) + (while (and p (not (eq? '- (car p)))) + (set! syl (cons (car p) syl)) + (if (string-matches (car p) ".*1") + (set! stress 1)) + (set! p (cdr p))) + (set! p (cdr p)) ;; skip the syllable separator + (set! syls (cons (list (reverse syl) stress) syls))) + (reverse syls))) + +(provide 'spanlex) + diff --git a/lib/voices/spanish/el_diphone/festvox/sptoken.scm b/lib/voices/spanish/el_diphone/festvox/sptoken.scm new file mode 100644 index 0000000..cc2dbac --- /dev/null +++ b/lib/voices/spanish/el_diphone/festvox/sptoken.scm @@ -0,0 +1,227 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; ;; +;;; Centre for Speech Technology Research ;; +;;; University of Edinburgh, UK ;; +;;; Copyright (c) 1996,1997 ;; +;;; All Rights Reserved. ;; +;;; ;; +;;; Permission is hereby granted, free of charge, to use and distribute ;; +;;; this software and its documentation without restriction, including ;; +;;; without limitation the rights to use, copy, modify, merge, publish, ;; +;;; distribute, sublicense, and/or sell copies of this work, and to ;; +;;; permit persons to whom this work is furnished to do so, subject to ;; +;;; the following conditions: ;; +;;; 1. The code must retain the above copyright notice, this list of ;; +;;; conditions and the following disclaimer. ;; +;;; 2. Any modifications must be clearly marked as such. ;; +;;; 3. Original authors' names are not deleted. ;; +;;; 4. The authors' names are not used to endorse or promote products ;; +;;; derived from this software without specific prior written ;; +;;; permission. ;; +;;; ;; +;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; +;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; +;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; +;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; +;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; +;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; +;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; +;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; +;;; THIS SOFTWARE. ;; +;;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Tokenization rules for spanish +;;; +;;; Particularly numbers and symbols. +;;; +;;; As the "el" database has no dipthongs, numbers sound much +;;; better removing the weak vowel ("ventiuno" instead of "veintiuno") +;;; Many speakers do this, so no problem with it. + +(define (spanish_number name) +"(spanish_number name) +Convert a string of digits into a list of words saying the number." + (if (string-matches name "0") + (list "cero") + (spanish_number_from_digits (symbolexplode name)))) + +(define (just_zeros digits) +"(just_zeros digits) +If this only contains 0s then we just do something different." + (cond + ((not digits) t) + ((string-equal "0" (car digits)) + (just_zeros (cdr digits))) + (t nil))) + +(define (spanish_number_from_digits digits) + "(spanish_number_from_digits digits) +Takes a list of digits and converts it to a list of words +saying the number." + (let ((l (length digits))) + (cond + ((equal? l 0) + nil) + ((string-equal (car digits) "0") + (spanish_number_from_digits (cdr digits))) + ((equal? l 1);; single digit + (cond + ((string-equal (car digits) "0") (list "cero")) + ((string-equal (car digits) "1") (list "un")) + ((string-equal (car digits) "2") (list "dos")) + ((string-equal (car digits) "3") (list "tres")) + ((string-equal (car digits) "4") (list "cuatro")) + ((string-equal (car digits) "5") (list "cinco")) + ((string-equal (car digits) "6") (list "seis")) + ((string-equal (car digits) "7") (list "siete")) + ((string-equal (car digits) "8") (list "ocho")) + ((string-equal (car digits) "9") (list "nueve")) + ;; fill in the rest + (t (list "equis"))));; $$$ what should say? + ((equal? l 2);; less than 100 + (cond + ((string-equal (car digits) "0");; 0x + (spanish_number_from_digits (cdr digits))) + + ((string-equal (car digits) "1");; 1x + (cond + ((string-equal (car (cdr digits)) "0") (list "diez")) + ((string-equal (car (cdr digits)) "1") (list "once")) + ((string-equal (car (cdr digits)) "2") (list "doce")) + ((string-equal (car (cdr digits)) "3") (list "trece")) + ((string-equal (car (cdr digits)) "4") (list "catorce")) + ((string-equal (car (cdr digits)) "5") (list "quince")) + (t + (cons "dieci" (spanish_number_from_digits (cdr digits)))))) + + ((string-equal (car digits) "2");; 2x + (if (string-equal (car (cdr digits)) "0") + (list "veinte") + (cons "venti" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "3");; 3x + (if (string-equal (car (cdr digits)) "0") + (list "treinta") + (cons "trentai" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "4");; 4x + (if (string-equal (car (cdr digits)) "0") + (list "cuarenta") + (cons "cuarentai" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "5");; 5x + (if (string-equal (car (cdr digits)) "0") + (list "cincuenta") + (cons "cincuentai" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "6");; 6x + (if (string-equal (car (cdr digits)) "0") + (list "sesenta") + (cons "sesentai" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "7");; 7x + (if (string-equal (car (cdr digits)) "0") + (list "setenta") + (cons "setentai" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "8");; 8x + (if (string-equal (car (cdr digits)) "0") + (list "ochenta") + (cons "ochentai" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "9");; 9x + (if (string-equal (car (cdr digits)) "0") + (list "noventa") + (cons "noventai" (spanish_number_from_digits (cdr digits))))) + + )) + + ((equal? l 3);; in the hundreds + (cond + + ((string-equal (car digits) "1");; 1xx + (if (just_zeros (cdr digits)) (list "cien") + (cons "ciento" (spanish_number_from_digits (cdr digits))))) + + ((string-equal (car digits) "5");; 5xx + (cons "quinientos" (spanish_number_from_digits (cdr digits)))) + + ((string-equal (car digits) "7");; 7xx + (cons "setecientos" (spanish_number_from_digits (cdr digits)))) + + ((string-equal (car digits) "9");; 9xx + (cons "novecientos" (spanish_number_from_digits (cdr digits)))) + + (t;; ?xx + (append (spanish_number_from_digits (list (car digits))) + (list "cientos") + (spanish_number_from_digits (cdr digits)))) + )) + + ((< l 7) + (let ((sub_thousands + (list + (car (cdr (cdr (reverse digits)))) + (car (cdr (reverse digits))) + (car (reverse digits)))) + (thousands (reverse (cdr (cdr (cdr (reverse digits))))))) + (set! x (spanish_number_from_digits thousands)) + (append + (if (string-equal (car x) "un") nil x) + (list "mil") + (spanish_number_from_digits sub_thousands)))) + + ((< l 13) + (let ((sub_million + (list + (car (cdr (cdr (cdr (cdr (cdr(reverse digits))))))) + (car (cdr (cdr (cdr (cdr (reverse digits)))))) + (car (cdr (cdr (cdr (reverse digits))))) + (car (cdr (cdr (reverse digits)))) + (car (cdr (reverse digits))) + (car (reverse digits)) + )) + (millions (reverse (cdr (cdr (cdr (cdr (cdr (cdr (reverse digits)))))))))) + (set! x (spanish_number_from_digits millions)) + (append + (if (string-equal (car x) "un") + (list "un" "millon") + (append x (list "millones"))) + (spanish_number_from_digits sub_million)))) + + (t + (list "un" "numero" "muy" "gr'aaaaaandee"))))) + + +(define (spanish_token_to_words token name) + "(spanish_token_to_words TOKEN NAME) +Returns a list of words for the NAME from TOKEN. This primarily +allows the treatment of numbers, money etc." + (cond + ((string-matches name "[1-9][0-9]+") + (spanish_number name)) + ((not (lts.in.alphabet name 'spanish_downcase)) + ;; It contains some other than the lts can deal with + (let ((subwords)) + (item.set_feat token "pos" "nn") + (mapcar + (lambda (letter) + ;; might be symbols or digits + (set! subwords + (append + subwords + (cond + ((string-matches letter "[0-9]") + (spanish_number letter)) + ((string-matches letter "[A-ZÁÉÍÓÚÜÑ]") + (spanish_downcase letter)) + (t + (list letter)))))) + (symbolexplode name)) + subwords)) + (t + (list name)))) + +(provide 'sptoken) + |