summaryrefslogtreecommitdiff
path: root/lib/voices/finnish/suo_fi_lj_diphone/festvox/finnish_mv_phrase.scm
diff options
context:
space:
mode:
authorNiko Tyni <ntyni@iki.fi>2005-12-04 14:34:26 +0100
committerNiko Tyni <ntyni@iki.fi>2005-12-04 14:34:26 +0100
commitefc580d40dc9f67c6edb5f7c5852f0fbef22578d (patch)
tree2b361b354cf44fefe9c4fa6f644ed6ebff870de1 /lib/voices/finnish/suo_fi_lj_diphone/festvox/finnish_mv_phrase.scm
Import festvox-suopuhe-lj_1.0g-20051204.orig.tar.gz
[dgit import orig festvox-suopuhe-lj_1.0g-20051204.orig.tar.gz]
Diffstat (limited to 'lib/voices/finnish/suo_fi_lj_diphone/festvox/finnish_mv_phrase.scm')
-rw-r--r--lib/voices/finnish/suo_fi_lj_diphone/festvox/finnish_mv_phrase.scm410
1 files changed, 410 insertions, 0 deletions
diff --git a/lib/voices/finnish/suo_fi_lj_diphone/festvox/finnish_mv_phrase.scm b/lib/voices/finnish/suo_fi_lj_diphone/festvox/finnish_mv_phrase.scm
new file mode 100644
index 0000000..28c540a
--- /dev/null
+++ b/lib/voices/finnish/suo_fi_lj_diphone/festvox/finnish_mv_phrase.scm
@@ -0,0 +1,410 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; ;;
+;;; Department of General Linguistics / Suopuhe project ;;
+;;; University of Helsinki, FI ;;
+;;; Copyright (c) 2000,2001,2002,2003 ;;
+;;; All Rights Reserved. ;;
+;;; ;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; ;;
+;;; Authors: ;;
+;;; ;;
+;;; Martti Vainio ;;
+;;; e-mail: martti.vainio@helsinki.fi ;;
+;;; address: Department of General Linguistics ;;
+;;; PL 9 (Siltavuorenpenger 20A) ;;
+;;; 00014 University of Helsinki ;;
+;;; FINLAND ;;
+;;; ;;
+;;; Nicholas Volk ;;
+;;; e-mail: nvolk@ling.helsinki.fi ;;
+;;; address: Department of General Linguistics ;;
+;;; PL 9 (Siltavuorenpenger 20A) ;;
+;;; 00014 University of Helsinki ;;
+;;; FINLAND ;;
+;;; ;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Phrase break prediction
+;;;
+
+; This program is distributed under Gnu Lesser General Public License (cf. the
+; file LICENSE in distribution).
+
+; This program is free software; you can redistribute it and/or modify
+; it under the terms of the GNU Lesser General Public License as published by
+; the Free Software Foundation; either version 2 of the License, or
+; (at your option) any later version.
+
+; This program is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+; GNU Lesser General Public License for more details.
+
+(require 'finnish_lex) ;; causers of initial doubling are in a word list
+
+;; tällekin pitäisi opettaa listaintonaatio
+(set! finnish_phrase_cart_tree
+'
+;; phrasal break
+((R:Token.parent.pbreak is PB)
+ ((PB))
+ ;; list break
+ ((R:Token.parent.pbreak is LB)
+ ((LB))
+ ((R:Token.parent.pbreak is BB)
+ ((BB))
+ ((lisp_token_end_punc in ("?" "." ":"))
+ ((BB))
+ ((R:Token.parent.pbreak is B)
+ ((B))
+ ((lisp_token_end_punc in (";" ","))
+ ((B))
+ ((n.name is 0) ;; end of utterance
+ ((BB))
+ ((NB))))))))))
+
+
+(define (word_has_break? word)
+ "(word_has_break? WORD)
+True, if the WORD is followed by # or ## \"phone\"."
+ (let ((final
+ (item.relation
+ (item.last_leaf
+ (item.relation word 'SylStructure))
+ 'Segment)))
+ (and (item.next final)
+ (string-matches (item.name (item.next final)) "^#+$"))))
+
+(define (distance_to_next_pause word)
+ "(distance_to_next_pause WORD)
+Counts the distance from the end of the word to the next pause in syllables."
+ (cond
+ ((not (item.next word))
+ 0)
+ ((word_has_break? word)
+ 0)
+ (t
+ (+ (syllables_in_word (item.daughter1 (item.relation word 'SylStructure)))
+ (distance_to_next_pause (item.next word))))))
+
+(define (syllables_in_word SYL)
+ "(syllables_in_word SYL)
+Counts the number of syllables in a given word. Actually counts the numebr
+of syllables from a given syllable to the end of word."
+ (if (item.next SYL)
+ (+ 1 (syllables_in_word (item.next SYL)))
+ 1))
+
+(define (distance_from_prev_pause word)
+ "(distance_from_prev_pause WORD)
+Distance (in syllables) from the previous word."
+ (cond
+ ((not (item.prev word))
+ 0)
+ ((word_has_break? (item.prev word))
+ 0)
+ (t
+ (+
+ (syllables_in_word
+ (item.daughter1 (item.relation (item.prev word) 'SylStructure)))
+ (distance_from_prev_pause (item.prev word))))))
+
+
+(define(insert_pause2 word pausetype)
+ "(insert_pause2 WORDITEM)
+ Insert a little break (pause segment) after the last segment in WORDITEM in UTT."
+
+ (if hy_debug (print (string-append "Lisätään tauko (" pausetype ") " (item.name word) "-sanan perään.")))
+
+ ;; sets the value pbreak feature, not too useful...
+ (if (not (string-equal (item.feat word 'pbreak) "BB"))
+ (item.set_feat word 'pbreak pausetype))
+ ; ... since the trick is done by adding a #
+ (let ((lastseg (find_last_seg word))
+ (silence (if (or (string-equal (item.feat word 'pbreak) "PB")
+ (string-equal (item.feat word 'pbreak) "LB"))
+ "##" ;; pikkutauko
+ "#")))
+ (if lastseg
+ (item.relation.insert
+ lastseg 'Segment (list silence) 'after))))
+
+;(define (lingvistinen_tauon_esto sana)
+; "(lingvistinen_tauon_esto WORD)
+;Estää tauon tulemisen tähän kohtaan...
+;Ei juuri implementoitu vielä...
+;Pitää vähän miettiä...
+;Mitä tehdään sanatasolla ja mikä muualla?"
+; (let ((thispos (item.feat sana 'pos))
+; (nextpos (if (item.next sana)
+; (item.feat (item.next sana) 'pos)
+; nil)))
+;
+; (cond
+; ((and (string-equal thispos "adjective" nextpos "noun"))
+; ;;(print "ei taukoa a+n väliin!")
+; t)
+; ((and (string-equal thispos "noun" nextpos "noun"))
+; (print "ei taukoa n+n väliin!")
+; t)
+; (t
+; ;;(print (string-append thispos " + " nextpos))
+; nil))))
+
+
+(define (find_phrase_break word)
+ "(find_phrase_break WORD)
+If the distance from previous break to the next break is
+long enough (now 25 syllables), try to add more pauses
+in between. New pauses are added between certain word classes."
+ (cond
+ ;; something went wrong... return
+ ((not word)
+ nil)
+ ;; less than N syllables (current 25 is an arbitrary choice): no break
+ ((< (+ (distance_to_next_pause word) (distance_from_prev_pause word))
+ 25)
+ (find_phrase_break (skip_pause word)))
+ ;; break needed
+ (t
+ (if hy_debug (format t
+ "Trying to add a break after \"%s\"\n"
+ (upcase (item.name word))))
+
+ (find_phrase_break (skip_pause (add_linguistic_break word))))))
+
+
+(define (add_linguistic_break word)
+ "(add_linguistic_break WORD)
+Adds a phrase break after the word, if the context is appropriate.
+If the word is final return nil. If there already is a pause
+return the word itself. Also is pause is added, return the word itself.
+If pause couldn't be added, try the same with the next word. Simple!"
+(if (not (item.next word))
+ nil
+ (let ((thispos (item.feat word 'pos))
+ (nextpos (item.feat (item.next word) 'pos)))
+ ;; (print (string-append thispos "#" nextpos " paired"))
+ (cond
+ ;; we already have a pause, so let's not try to add one...
+ ((word_has_break? word)
+ word)
+ ;; not far enough from the phrase beginning
+ ((or (not (item.prev word))
+ (word_has_break? (item.prev word)))
+ (add_linguistic_break (item.next word)))
+ ;; put a break after a PoStPosition
+ ((string-equal thispos "psp")
+ (print (string-append " Pause after PSP \""(item.name word)"\"."))
+ (insert_pause2 word "B")
+ word)
+ ;; (NOUN|NUM|PROP) # V
+ ((and (or (string-equal thispos "adjective")
+ (string-equal thispos "noun")
+ (string-equal thispos "num")
+ (string-equal thispos "prop"))
+ (string-equal nextpos "verb"))
+ (format t "\nInserted pause after in N#V postition after %s."
+ (upcase (item.name word)))
+ (insert_pause2 word "PB")
+ word)
+ ;; NUM # PROP
+ ((and (or (string-equal thispos "num")
+ (string-equal thispos "noun"))
+ (string-equal nextpos "prop"))
+ (format t "\nInserted pause after in NUM#PROP postition after %s."
+ (upcase (item.name word)))
+ (insert_pause2 word "PB")
+ word)
+
+ ;; NOUN # CC NOUN
+ ((and (string-equal thispos "noun")
+ (string-equal nextpos "conj")
+ (item.next (item.next word))
+ (string-equal (item.feat (item.next (item.next word)) 'pos) "noun"))
+ (print (string-append "Inserted pause between NOUN \""
+ (item.name word)
+ "\" CONJ and \""
+ (item.name (item.next word)) "\"."))
+ (insert_pause2 word "PB")
+ word)
+ ;; ADJ # CC ADJ
+ ((and (string-equal thispos "adjective")
+ (string-equal nextpos "conj")
+ (item.next (item.next word))
+ (string-equal (item.feat
+ (item.next (item.next word)) 'pos) "adjective"))
+ (print (string-append "Inserted pause between ADJ \""
+ (item.name word)
+ "\" CONJ and \""
+ (item.name (item.next word)) "\"."))
+ (insert_pause2 word "PB")
+ word)
+
+ ;; ADJ # ADJ
+ ((and (string-equal thispos "adjective")
+ (string-equal nextpos "adjective"))
+
+ (print (string-append "Inserted pause between ADJ \""
+ (item.name word) "\" and ADJ \""
+ (item.name (item.next word)) "\"."))
+ (insert_pause2 word "PB")
+ word)
+ ;; V # NUM
+ ((and (string-equal thispos "verb")
+ (string-equal nextpos "num"))
+
+ (insert_pause2 word "PB")
+ word)
+ (t
+ (add_linguistic_break (item.next word)))))))
+
+
+(define (skip_pause WORD)
+ "(skip_pause WORD)
+Returns the word after the next pause or nil if there are no intersentence
+pauses left."
+ (if (item.next WORD)
+ (if (word_has_break? WORD)
+ (item.next WORD)
+ (skip_pause (item.next WORD)))
+ nil))
+
+(define (suopuhe_add_break utterance)
+ "(suopuhe_add_break UTT)
+Adds pauses if necessary to a (overlong) utterance.
+Applies some linguistic intelligence in the process..."
+ (if hy_debug (format stderr "------- PAUSE-ADDING BEGINS ----------\n"))
+
+
+ ;; converts punctions in breaks
+ (suopuhe_Pauses utterance)
+
+ ;; suopuhe-mode can provide linguistic information.
+ ;; we may be able to place additional breaks based on that information
+ (if suopuhe
+ (find_phrase_break (utt.relation.first utterance 'Word))
+ ;; on the other hand initial doubling is only done (here)
+ ;; when not using the suopuhe-mode.
+ ;; we only hope that the xml input's creator took care of them...
+ ;; A bit optimistic assumption, but doing things twice can cause
+ ;; mayhem...
+ (begin
+ (if hy_debug ( format stderr "------ ALKUKAHDENNUS ALKAA -----------\n"))
+ (initial_doubling (utt.relation.first utterance 'Word))))
+
+ ;; these global variables contains the accent and intonation
+ ;; commands for the fujisaki model
+ ;; we want to calculate them only once
+ ;; so these variables tell, whether they have already been counted
+ ;; for the given utterance
+ (set! suopuhe_accent nil)
+ (set! suopuhe_phrase nil)
+
+ utterance)
+
+(define (suopuhe_Pauses utt)
+ "(suopuhe_Pauses utt)
+Converts the punctuation marks into appropriate pauses.
+Does the list intonation too."
+ (let ((words (utt.relation.items utt 'Word))
+ lastword)
+ (if words
+ (begin
+ (insert_initial_pause utt) ;; always have a start pause
+ (set! lastword (car (last words)))
+ (mapcar
+ (lambda (w)
+ (let ((pbreak (item.feat w "pbreak"))
+ (emph (item.feat w "R:Token.parent.EMPH")))
+
+ (cond
+ ((equal? w lastword)
+ ;; last word is followed by a double pause
+ ;; this is to reduce the effect of an annoying syntesis
+ ;; feature...
+ (insert_pause utt w)
+ (insert_pause utt w))
+ ((or (string-equal "BB" pbreak)
+ (string-equal "LB" pbreak)
+ (string-equal "PB" pbreak)
+ (string-equal "B" pbreak))
+ ;; list intonation
+ (let ((a (equal? (item.feat w "p.p.lisp_token_end_punc") ","))
+ (b (equal? (item.feat w "p.lisp_token_end_punc") ","))
+ (c (equal? (item.feat w "lisp_token_end_punc") ","))
+ (d (equal? (item.feat w "n.lisp_token_end_punc") ","))
+ (e (equal? (item.feat w "n.n.lisp_token_end_punc") ",")))
+ (cond
+ ((or (and b c)
+ (and c d)
+ (and c (equal? (item.feat w "n.n.pos") "COORD")))
+ (set! pbreak "LB"))))
+
+ (insert_pause2 w pbreak)))))
+ words)))
+ utt))
+
+(define (initial_doubling WORD)
+ "(initial_doubling WORD)
+Checks wheter the current WORD can triggers initial doubling on the
+next word or not."
+ (let ((name (downcase (item.name WORD))))
+ (if (and (item.next WORD)
+ (not (word_has_break? WORD))
+ ;; (not suopuhe) ;; suopuhe-mode deal this personally ???
+ (word_list_entry? name doubler_words))
+ (begin
+ (if hy_debug
+ (format stderr
+ "%s triggers initial doubling (word list)\n"
+ name))
+ (set! next_initial (item.next (item.relation (item.last_leaf (item.relation WORD 'SylStructure)) 'Segment)))
+
+ (let ((char (item.name next_initial)))
+ (if hy_debug (print (string-append char " => " char ":")))
+ (cond ;; the following consonants are doubled:
+ ((string-equal "b" char)
+ (item.set_name next_initial "b:"))
+ ((string-equal "d" char)
+ (item.set_name next_initial "d:"))
+ ((string-equal "g" char)
+ (item.set_name next_initial "g:"))
+ ((string-equal "h" char)
+ (item.set_name next_initial "h:"))
+ ((string-equal "j" char)
+ (item.set_name next_initial "j:"))
+ ((string-equal "k" char)
+ (item.set_name next_initial "k:"))
+ ((string-equal "l" char)
+ (item.set_name next_initial "l:"))
+ ((string-equal "m" char)
+ (item.set_name next_initial "m:"))
+ ((string-equal "n" char)
+ (item.set_name next_initial "n:"))
+ ((string-equal "p" char)
+ (item.set_name next_initial "p:"))
+ ((string-equal "r" char)
+ (item.set_name next_initial "r:"))
+ ((string-equal "s" char)
+ (item.set_name next_initial "s:"))
+ ((string-equal "S" char)
+ (item.set_name next_initial "S:"))
+ ((string-equal "t" char)
+ (item.set_name next_initial "t:"))
+ ((string-equal "v" char)
+ (item.set_name next_initial "v:"))))))
+ (if (item.next WORD)
+ (initial_doubling (item.next WORD)))))
+
+
+
+
+
+
+(provide 'finnish_mv_phrase)
+
+
+