summaryrefslogtreecommitdiff
path: root/lib/voices/finnish/hy_fi_mv_diphone/festvox/finnish_mv_int.scm
blob: f137959be30213437711ab3485f38ceb5c34ec21 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;                                                                       ;;
;;;          Department of General Linguistics / Suopuhe project          ;;
;;;                      University of Helsinki, FI                       ;;
;;;                   Copyright (c) 2000,2001,2002,2003                   ;;
;;;                        All Rights Reserved.                           ;;
;;;                                                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;                                                                       ;
;;; Authors:                                                              ;
;;;                                                                       ;
;;;          Martti Vainio                                                ;
;;;  e-mail: martti.vainio@helsinki.fi                                    ;
;;; address: Department of General Linguistics                            ;
;;;          PL 9 (Siltavuorenpenger 20A)                                 ;
;;;          00014 University of Helsinki                                 ;
;;;          FINLAND                                                      ;
;;;                                                                       ;
;;;          Nicholas Volk                                                ;
;;;  e-mail: nvolk@ling.helsinki.fi                                       ;
;;; address: Department of General Linguistics                            ;
;;;          PL 9 (Siltavuorenpenger 20A)                                 ;
;;;          00014 University of Helsinki                                 ;
;;;          FINLAND                                                      ;
;;;                                                                       ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;;  Accent and F0 prediction
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Intonation


; This program is distributed under Gnu Lesser General Public License (cf. the
; file LICENSE in distribution).
 
; This program is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
 
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU Lesser General Public License for more details.

;;; (SayText "sanoisin ett� en sano" )
(define (monotone utt syl)
  "(monotone UTT SYL)
Monotonous intonation model."
  (let ((middle (/ (+ (item.feat syl 'syllable_start) (item.feat syl 'syllable_end)) 2)))
    (list (list middle 100))))

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Intonation
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(set! finnish_accent_cart_tree
      '((stress is 2)
	((R:SylStructure.parent.pos is function)
	 ((NONE))
	 ((R:SylStructure.parent.pos is COP)
	  ((NONE))
	  ((R:SylStructure.parent.pos is PRON)
	   ((NONE))
	   ((R:SylStructure.parent.pos is COORD)
	    ((NONE))

	    ((Accented))))))
	((NONE))))




;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Fujisaki model
;;

; The model and algorith is as follows:
;-----------------------------------------------------------------------
; Variables:
;
; Accent levels: 0 == no accent
;                1 == slightly accented (pre- and post-focal accent)
;                2 == normal accent
;                3 == focus
;                4 == emphasis

; Accent types:  0 == normal (
;                1 == spread
;                2 == delayed
;                ...
; points = (0.2 0.4 0.6 0.8 1.0)
; alpha = 2 (/s)
; beta = 15 (/s) 
; f_min = 60 (Hz)
;-----------------------------------------------------------------------       
; Algorithm:
;   for each utterance
;    accent_levels = predict accent levels
;    accent_types  = predict accent type
;    accent_list   = calculate_accent_commands(accent_levels, accent_types)
;    phrase_list   = calculate_phrase_commands(utterance, pharases)
;    syllables     = syllables(utterance)
;    calculate fujisaki_contour(segments, phrase_list, accent_list)
;
; fujisaki_contour:
;   for syllable in  syllables:
;     for point in points:
;       ph_level = calculate_phrase_level(point, phrase_list, alpha)
;       ac_level = calculate_accent_level(point, accent_list, beta)
;       f0 = exp(ph_level + ac_level + f_min) 

;; global parameters for the Fujisaki model
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; we should use the Parameter.set etc. methods:


;; Some auxiliary functions, which should be somewhere else
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; this is not called anywhere?
;;(define (incf var val)
;;  (+ var val))

(define (sqr number)
  "(sqr NUM)
NUM ** 2."
  (* number number))

(define (neg number)
  "(neg number)
Negates a number -- Festival SIOD doesn't understand (- number), but
requires TWO arguments to the '-' operator"
  (* -1 number))


(define (min num1 num2)
  "(min num1 num2)
Returns the smaller of the two."
  (cond ((<= num1 num2)
        num1)
        (t num2)))

(define (max num1 num2)
  "(max num1 num2)
Returns the greater of the two."
  (cond ((<= num1 num2)
        num2)
        (t num1)))


(define (accented_p syl)
  "(accented_p SYL)
Sees if the syllable is accented..." 
  (cond
   ((not (equal? (item.feat syl "R:Intonation.daughter1.name") "Accented"))
    nil)
   ((string-equal (item.feat syl 'R:SylStructure.parent.pos) "function")
    nil)   
   ((string-equal (item.feat syl 'R:SylStructure.parent.pos) "COP")
    nil)
   ((string-equal (item.feat syl 'R:SylStructure.parent.pos) "PRON")
    nil)
   (t
    t)))
;   
;
;  (and (string-equal (item.feat syl 'R:SylStructure.parent.pos) "content")
;       ;; eka tavu
;       (equal? (item.feat syl "R:Intonation.daughter1.name") "Accented")))


;; Accent and Phrase parameter prediction
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(define (get_accent_list utt)
  "(get_accent_list UTT)"
  (let ((syllables (utt.relation.items utt 'Syllable))
	(ennakko 0.00)
	(no_accent nil) ;; debug variable: removes accent commands
        (ac_list nil))
    (mapcar
     (lambda (syl) ;; for each syllable
       
       (let ((onset  (- (item.feat syl 'syllable_start) ennakko))
	     (offset (item.feat syl 'syllable_end)))

	 (cond
	  (no_accent
	   nil)
	  ;; only syllable
	  ((and (not (item.next syl))
		(not (item.prev syl)))
	   (set! ac_list (append ac_list (list (list onset offset 0.1)))))
	  ;; only word, first syllable
	  ((and (not (item.prev syl))
		(not (item.next (item.parent (item.relation syl 'SylStructure)))))
	   (set! ac_list (append ac_list (list (list onset offset 0.25)))))
	  ;; first syllable on an accented word
	  ((and (accented_p syl)
		(or (not (item.prev syl))
		    (not (string-equal
			  (item.feat syl 'R:SylStructure.parent.name)
			  (item.feat syl "p.R:SylStructure.parent.name")))))
	   
	   ;; minimum of (120ms * stretch)
	   (if (< (- offset onset) (* 0.120 (Param.get 'Duration_Stretch)))
	       (set! offset (+ onset (* 0.120 (Param.get 'Duration_Stretch)))))
	   
	   (set! ac_list (append ac_list (list (list onset offset 0.4)))))
	  ;; unstressed last syllable: negative accent.
	  ((and (not (accented_p syl))
		(not (item.next syl)))
	   (set! onset (- onset (/ (- onset (item.feat (item.prev syl) 'syllable_start)) 2)))
	   
	   (set! ac_list
		 (append ac_list 
			 (list (list 
				onset 
				(+ offset (* 0.2 (Param.get 'Duration_Stretch )))
				-0.3)))))
	  ;; default: do nothing
	  (t 
	   nil))))
     syllables)
    ac_list))

;; find all phrases within an utterance:
(define (find_phrases utt)
  "(define (find_phrases utt)
Returns a list of phrases from an utterance in the form
of lists whose car is the break level followed by start and end times"
  (let ((wrds (utt.relation.items utt 'Word))
	;; end is problematic if (SayText UTT) contains more
	;; than one sentence. SEP..
	(end (item.feat (utt.relation.last utt 'Word) 'word_end))
        (result nil)
        (start -0.2))
 
    (mapcar
     (lambda (wrd)
;;       (set! end (item.feat wrd 'word_end))
       (let ((break (item.feat wrd 'pbreak))
;	     (end (item.feat wrd 'word_end))
	     (next (item.next wrd)))
	 (cond 
	  ;; ordinary break
	  ((or (equal? break "B")
	       (equal? break "BB")
	       (equal? break "PB")) ;; Phrasal Break
	   (set! result (append result (list (list break start end))))
	   (set! start (item.feat wrd 'word_end)))
	   ;; break within list
	  ((equal? break "LB")
	   (set! result (append result (list (list break start end))))
	   (set! start
		 (if next 
		     (max
		      (- (item.feat wrd 'word_end) 0.1)
		      (/ (+ (item.feat wrd 'word_start)
			    (item.feat wrd 'word_end))
			 2))
		     "no_start")))
	  (t nil))))
     wrds)
    
    result))

(define (utt.length utt)
  "(utt.length utt)
Returns the length of an utterance in syllables"
  (length (utt.relation.items utt 'Syllable)))

;;; produce a list of phrase commands:
(define (get_phrase_list utt)
  "(get_phrase_list utt)
Returns a list of phrase commands for an utterance. The first command
is higher than the following ones."
  (let ((phrases (find_phrases utt))
        (phrase_list nil)
        (len nil))
    (set! len (length phrases))
    (set! phrase_list
          (mapcar
           (lambda (phrase)
             (let ((b_type (car phrase))
                   (start (nth 1 phrase))
                   (end (nth 2 phrase))
                   (level 0))
               (cond ((or (and (= len 1) (> (utt.length utt) 5))
                          (and (= (position phrase phrases) 0)
                               (not (string-equal b_type "BB")))) ;; first phrase of more than one
		      (if (string-equal current-voice "suo_fi_lj_diphone")
			  (list start end 0.4)
			  ;; male voice
			  (list start end 0.6))) ;; was 0.7
		     ((or (string-equal b_type "LB")
			  (string-equal b_type "PB"))
		      ;; list intonation 
		      (list start end 0.05))
                     (t
                      (list start end 0.3)))))
           phrases))
    phrase_list))


;;; this is the actual Fujisaki equation:
;;;
;;; calculate accent amplitude for a given point:
(define (get_amplitude time beta)
  "(get_amplitude point beta)
Calculate the amplitude from time and beta information:"
  (if (< time 0) 
      0 ;; <- THEN
      (min 0.9 ;; <- ELSE
	   (- 1 (* (+ 1 (* beta time))
		   (exp (* (neg beta) time)))))))

;;; calculate phrase amplitude for a given point:
(define (get_phrase_amplitude time alpha)
  "(get_phrase_amplitude time alpha)
calculate phrase amplitude for a given point in time."
  (cond ((>= time 0)
	 (begin
	   (* (sqr alpha) 
	      time
	      (exp (* (neg alpha) time)))))
	(t 0.0)))


;;; calculate the actual f0 for a given time (point):
(define (calculate_fujisaki accent_list phrase_list point alpha beta f_min)
  " (calculate_fujisaki acc_list phrase list point alpha beta f_min)
Calculates the Fujisaki parameter values for a given point. Returns an absolute
Hertz value"
;;  (print point)
  (let (ph_level
        ac_level
	speed_level
        result)
    (set! ph_level
          (let ((Sum_Pa 0.0))
            (mapcar
             (lambda (p)
               (let ((onset (+ (car p) (/ -1 beta))) ;;
                     (offset (nth 1 p))
                     (amp (nth 2 p)))
                 (set! Sum_Pa
                       (+ Sum_Pa
                          (* amp
                             (get_phrase_amplitude
			      (if (> point offset)
				  0.0
				  (- point onset))
			      alpha ))))))
             phrase_list)
	    Sum_Pa))
    (set! ac_level
          (let ((Sum_Aa 0.0))
            (mapcar
             (lambda (ac)
               (let ((onset (car ac))    ;; onset of the accent command
                     (offset (nth 1 ac)) ;; offset of the acc. com.
                     (amp (nth 2 ac)))    ;; amplitude of the  acc. com.
                 (set! Ga_T1
                       (get_amplitude (- point onset) beta))
                 (set! Ga_T2
                       (get_amplitude (- point offset) beta))
                 (set! Sum_Aa
                       (+ Sum_Aa (* amp (- Ga_T1 Ga_T2))))))
             accent_list)
            Sum_Aa))
    ;;; speed level raises f0 in fast speech
    (set! speed_level (* 100 (- 1 (min (Parameter.get 'Duration_Stretch) 1))))
    ;; maximum raise of 40dB
    (set! speed_level (min 40 speed_level))


    ;; fast speech is also more monotonous...
    (set! result (+ speed_level
		    (exp 
		     (+ (* ph_level (min (Parameter.get 'Duration_Stretch) 1)) 
			(* ac_level (min (Parameter.get 'Duration_Stretch) 1))

			(log f_min)))))
    result))

;;; calculate the local f0 contour for a syllable
;;; for a given number of points, determined by
;;; the parameter *points*
(define (fujisaki_targ_func utt syl)
  "(fujisaki_targ_func UTT STREAMITEM)
Returns a list of targets for the given syllable."
  ;; (if hy_debug (print "Fujisaki_targ_func"))

  
  (baptize syl) ;; give names to syllables

  (begin
    (let ((start (item.feat syl 'syllable_start))
	  (end (item.feat syl 'syllable_end))
	  (accent_list (or suopuhe_accent
			   ;; wierd way to print status reports (:
			   (if hy_debug (format stderr "------ COUNTING THE F0 CONTOUR ------\n"))
			   (get_accent_list utt)))
	  (phrase_list (or suopuhe_phrase
			   (get_phrase_list utt)))
	  
	  
	  (*points* nil)
	  result dur)
      ;; nint was the easiest way to create an integer, where's abs?
      ;; calculate f0 after every 50 mseconds
      (let ((i (if (< start (nint start))
		   (- (nint start) 1)
		   (nint start))))
	(while (< i end)
	       (begin
		 (if (>= i start)
		     (set! *points* (flatten (list *points* i))))
		 (set! i (+ i 0.050))))) ;; + 50 ms
      ;; if too short a syllable, use start and end times instead.
      (if (not *points*)
	  (set! *points* (list start end))
	  (begin
	    (if (not (item.next syl))
		(set! *points* (flatten (list *points* end (+ end 0.1)))))
	    (if (not (item.prev syl))
		(set! *points* (flatten (list 0.0 *points*))))))

            

      (set! dur (- end start))
      (set! result (mapcar 
		    (lambda (point)
		      ;;			(if (> (+ start (* dur point))
		      ;;			       end)
		      ;;(print (string-append "Dur: " dur " point " (+ start (* dur point)) " end " end))
;;		      (list (+ start (* dur point))
		      (list point
			    (min *f_max*
				 (calculate_fujisaki
				  accent_list
				  phrase_list
				  ;; IN ABSOLUT TIME
				  point
				  ;; THIS WAS IN THE RELATIVE APPROACH:
				  ;;(+ start (* point dur))
				  *alpha* *beta* *f_min*))))
		    *points*))

      (set! suopuhe_accent accent_list)
      (set! suopuhe_phrase phrase_list)
      (if hy_debug
	  (begin

	    (format t "   Time      F0 in syllable \"%s\".\n" (item.name syl))
	    
	    (mapcar (lambda (x)
		    (mapcar (lambda (y)
				(format t "%7.3f " y))
			      x)
		      (format t "\n"))
		    result)))  
      (if (and hy_debug
	       (not (item.next syl))) 
	  (begin
	    (format t "Phrase commands\n   Start    End    Size\n")
	    (mapcar (lambda (x)
		      (mapcar (lambda (y)
				(format t "%7.3f " y))
			      x)
		      (format t "\n"))
		    phrase_list)
	    (format t "Accent commands\n   Start    End    Size\n")
	    (mapcar (lambda (x)
		      (mapcar (lambda (y)
				(format t "%7.3f " y))
			      x)
		      (format t "\n"))
		    
		    accent_list)))
		      
      ;;; (print phrase_list)
      ;;;(print accent_list)

;      (if hy_debug 
;	  (begin
;	    (format t "f0 at the first point of %l = %l\n" (item.name syl) (car result))
;	    (print result)))
      result)))


(define (baptize syl)
  "(baptize SYL)
Baptizes the given syllable by concatenating together the names
of it's daughters (phones)"
  (item.set_name syl 
		 (let ((str ""))
		   (mapcar 
		    (lambda (x) 
		      (set! str (string-append str (item.feat x "name"))))
		    (item.daughters (item.relation syl 'SylStructure)))
		   str)))

(provide 'finnish_mv_int)