diff options
Diffstat (limited to 'lib/voices/english/rab_diphone/festvox/rab_diphone.scm')
-rw-r--r-- | lib/voices/english/rab_diphone/festvox/rab_diphone.scm | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/lib/voices/english/rab_diphone/festvox/rab_diphone.scm b/lib/voices/english/rab_diphone/festvox/rab_diphone.scm new file mode 100644 index 0000000..7a6d2b8 --- /dev/null +++ b/lib/voices/english/rab_diphone/festvox/rab_diphone.scm @@ -0,0 +1,263 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; ;; +;;; Centre for Speech Technology Research ;; +;;; University of Edinburgh, UK ;; +;;; Copyright (c) 1996,1997 ;; +;;; All Rights Reserved. ;; +;;; ;; +;;; Permission is hereby granted, free of charge, to use and distribute ;; +;;; this software and its documentation without restriction, including ;; +;;; without limitation the rights to use, copy, modify, merge, publish, ;; +;;; distribute, sublicense, and/or sell copies of this work, and to ;; +;;; permit persons to whom this work is furnished to do so, subject to ;; +;;; the following conditions: ;; +;;; 1. The code must retain the above copyright notice, this list of ;; +;;; conditions and the following disclaimer. ;; +;;; 2. Any modifications must be clearly marked as such. ;; +;;; 3. Original authors' names are not deleted. ;; +;;; 4. The authors' names are not used to endorse or promote products ;; +;;; derived from this software without specific prior written ;; +;;; permission. ;; +;;; ;; +;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; +;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; +;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; +;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; +;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; +;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; +;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; +;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; +;;; THIS SOFTWARE. ;; +;;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Set up rab_diphones using the standard UniSyn diphone synthesizer +;;; +;;; Roger diphones: male RP English collected October 1996 +;;; + +(defvar rab_diphone_dir (cdr (assoc 'rab_diphone voice-locations)) + "rab_diphone_dir + The default directory for the rab diphone database.") + +(require 'mrpa_phones) +(require 'pos) +(require 'phrase) +(require 'tobi) +(require 'f2bf0lr) +(require 'mrpa_durs) +(require 'gswdurtreeZ) +(require_module 'UniSyn) + +(setup_oald_lex) + +;; set this to lpc or psola +(defvar rab_sigpr 'lpc) +;; Rset this to ungroup for ungrouped version +(defvar rab_groupungroup 'group) + +(if (probe_file (path-append rab_diphone_dir "group/rablpc16k.group")) + (defvar rab_index_file + (path-append rab_diphone_dir "group/rablpc16k.group")) + (defvar rab_index_file + (path-append rab_diphone_dir "group/rablpc8k.group"))) + +(set! rab_psola_sep + (list + '(name "rab_psola_sep") + (list 'index_file (path-append rab_diphone_dir "dic/diphdic_full.est")) + '(grouped "false") + (list 'coef_dir (path-append rab_diphone_dir "pm")) + (list 'sig_dir (path-append rab_diphone_dir "wav")) + '(coef_ext ".pm") + '(sig_ext ".wav"))) + +(set! rab_lpc_sep + (list + '(name "rab_lpc_sep") + (list 'index_file (path-append rab_diphone_dir "dic/diphdic_full.est")) + '(grouped "false") + (list 'coef_dir (path-append rab_diphone_dir "lpc")) + (list 'sig_dir (path-append rab_diphone_dir "lpc")) + '(coef_ext ".lpc") + '(sig_ext ".res"))) + +(set! rab_psola_group + (list + '(name "rab_psola_group") + (list 'index_file + (path-append rab_diphone_dir "group/rab.group")) + '(grouped "true"))) + +(set! rab_lpc_group + (list + '(name "rab_lpc_group") + (list 'index_file rab_index_file) + '(alternates_left ((i ii) (ll l) (u uu) (i@ ii) (uh @) (a aa) + (u@ uu) (w @) (o oo) (e@ ei) (e ei) + (r @))) + '(alternates_right ((i ii) (ll l) (u uu) (i@ ii) + (y i) (uh @) (r @) (w @))) + '(default_diphone @-@@) + '(grouped "true"))) + +;;; Setup the desried DB +(cond + ((and (eq rab_sigpr 'psola) + (eq rab_groupungroup 'group)) + (set! rab_db_name (us_diphone_init rab_psola_group))) + ((and (eq rab_sigpr 'psola) + (eq rab_groupungroup 'ungroup)) + (set! rab_db_name (us_diphone_init rab_psola_sep))) + ((and (eq rab_sigpr 'lpc) + (eq rab_groupungroup 'group)) + (set! rab_db_name (us_diphone_init rab_lpc_group))) + ((and (eq rab_sigpr 'lpc) + (eq rab_groupungroup 'ungroup)) + (set! rab_db_name (us_diphone_init rab_lpc_sep)))) + +(define (rab_postlex_syllabics utt) +"(rab_postlex_syllabics utt) +Because the lexicon is somewhat random in its used of syllable l n and +m this is designed to post process the output inserting schwa before +them. Ideally the lexicon should be fixed." + (mapcar + (lambda (s) + (if (and (member_string (item.name s) '("l" "n" "m")) + (string-equal "coda" (item.feat s "seg_onsetcoda")) + (not (member_string (item.feat s "p.name") '(l r))) + (string-equal "-" (item.feat s "p.ph_vc"))) + (item.relation.insert + s 'SylStructure + (item.insert s (list "@") 'before) + 'before))) + (utt.relation.items utt 'Segment))) + +(define (rab_diphone_const_clusters utt) +"(rab_diphone_const_clusters UTT) +Identify consonant clusters, dark ls etc in the segment item +ready for diphone resynthesis. This may be called as a post lexical +rule through poslex_rule_hooks." + (mapcar + (lambda (s) (rab_diphone_fix_phone_name utt s)) + (utt.relation.items utt 'Segment)) + utt) + +(define (rab_diphone_fix_phone_name utt seg) +"(rab_diphone_fix_phone_name UTT SEG) +Add the feature diphone_phone_name to given segment with the appropriate +name for constructing a diphone. Basically adds _ if either side is part +of the same consonant cluster, adds $ either side if in different +syllable for preceding/succeeding vowel syllable, and converts l to ll +in coda part of syllables." + (let ((name (item.name seg))) + (cond + ((string-equal name "#") t) + ((string-equal "-" (item.feat seg 'ph_vc)) + (if (and (member_string name '(r w y l)) + (member_string (item.feat seg "p.name") '(p t k b d g)) + (item.relation.prev seg "SylStructure")) + (item.set_feat seg "us_diphone_right" (format nil "_%s" name))) + (if (and (member_string name '(w y l m n p t k)) + (string-equal (item.feat seg "p.name") 's) + (item.relation.prev seg "SylStructure")) + (item.set_feat seg "us_diphone_right" (format nil "_%s" name))) + (if (and (string-equal name 's) + (member_string (item.feat seg "n.name") '(w y l m n p t k)) + (item.relation.next seg "SylStructure")) + (item.set_feat seg "us_diphone_left" (format nil "%s_" name))) + (if (and (member_string name '(p t k b d g)) + (member_string (item.feat seg "n.name") '(r w y l)) + (item.relation.next seg "SylStructure")) + (item.set_feat seg "us_diphone_left" (format nil "%s_" name))) + (if (and (member_string name '(p k b d g)) + (string-equal "+" (item.feat seg 'p.ph_vc)) + (not (member_string (item.feat seg "p.name") '(@ aa o))) + (not (item.relation.prev seg "SylStructure"))) + (item.set_feat seg "us_diphone_right" (format nil "$%s" name))) + (if (and (member_string name '(p t k b d g)) + (string-equal "+" (item.feat seg 'n.ph_vc)) + (not (member_string (item.feat seg "n.name") '(@ aa))) + (not (item.relation.next seg "SylStructure"))) + (item.set_feat seg "us_diphone_left" (format nil "%s$" name))) + (if (and (string-equal "l" name) + (string-equal "+" (item.feat seg "p.ph_vc")) + (not (string-equal "a" (item.feat seg "p.ph_vlng"))) + (item.relation.prev seg 'SylStructure)) + (item.set_feat seg "us_diphone_right" "ll")) + (if (and (member_string name '(ch jh)) + (string-equal "+" (item.feat seg 'p.ph_vc))) + (item.set_feat seg "us_diphone_right" "t")) + ) + ))) + +(define (voice_rab_diphone) +"(voice_rab_diphone) + Set up the current voice to be a British male RP (Roger) speaker using + the rab diphone set." + (voice_reset) + (Parameter.set 'Language 'britishenglish) + ;; Phone set + (Parameter.set 'PhoneSet 'mrpa) + (PhoneSet.select 'mrpa) + ;; Tokenization rules + (set! token_to_words english_token_to_words) + ;; POS tagger + (set! pos_lex_name "english_poslex") + (set! pos_ngram_name 'english_pos_ngram) + (set! pos_supported t) + (set! guess_pos english_guess_pos) ;; need this for accents + ;; Lexicon selection + (lex.select "oald") + (set! postlex_rules_hooks (list postlex_apos_s_check + rab_postlex_syllabics)) + ;; Phrase prediction + (Parameter.set 'Phrase_Method 'prob_models) + (set! phr_break_params english_phr_break_params) + ;; Accent and tone prediction + (set! int_tone_cart_tree f2b_int_tone_cart_tree) + (set! int_accent_cart_tree f2b_int_accent_cart_tree) + ;; F0 prediction + (set! f0_lr_start f2b_f0_lr_start) + (set! f0_lr_mid f2b_f0_lr_mid) + (set! f0_lr_end f2b_f0_lr_end) + (Parameter.set 'Int_Method Intonation_Tree) + (set! int_lr_params + '((target_f0_mean 105) (target_f0_std 14) + (model_f0_mean 170) (model_f0_std 34))) + (Parameter.set 'Int_Target_Method Int_Targets_LR) + ;; Duration prediction -- use gsw durations + (set! duration_cart_tree gsw_duration_cart_tree) + (set! duration_ph_info gsw_durs) + (Parameter.set 'Duration_Method Duration_Tree_ZScores) + (Parameter.set 'Duration_Stretch 1.05) + ;; Waveform synthesizer: Roger diphones + ;; This assigned the diphone names from their context (_ $ etc) + (set! UniSyn_module_hooks (list rab_diphone_const_clusters )) + (set! us_abs_offset 0.0) + (set! window_factor 1.0) + (set! us_rel_offset 0.0) + (set! us_gain 0.9) + + (Parameter.set 'Synth_Method 'UniSyn) + (Parameter.set 'us_sigpr rab_sigpr) + (us_db_select rab_db_name) + + (set! current-voice 'rab_diphone) +) + +(proclaim_voice + 'rab_diphone + '((language english) + (gender male) + (dialect british) + (description + "This voice provides a British RP English male voice using a + residual excited LPC diphone synthesis method. It uses a + modified Oxford Advanced Learners' Dictionary for pronunciations. + Prosodic phrasing is provided by a statistically trained model + using part of speech and local distribution of breaks. Intonation + is provided by a CART tree predicting ToBI accents and an F0 + contour generated from a model trained from natural speech. The + duration model is also trained from data using a CART tree."))) + +(provide 'rab_diphone) |