summaryrefslogtreecommitdiff
path: root/lib/voices/finnish/suo_fi_lj_diphone/festvox/suopuhe_filter.perl
diff options
context:
space:
mode:
authorNiko Tyni <ntyni@iki.fi>2005-12-04 14:34:26 +0100
committerNiko Tyni <ntyni@iki.fi>2005-12-04 14:34:26 +0100
commitefc580d40dc9f67c6edb5f7c5852f0fbef22578d (patch)
tree2b361b354cf44fefe9c4fa6f644ed6ebff870de1 /lib/voices/finnish/suo_fi_lj_diphone/festvox/suopuhe_filter.perl
Import festvox-suopuhe-lj_1.0g-20051204.orig.tar.gz
[dgit import orig festvox-suopuhe-lj_1.0g-20051204.orig.tar.gz]
Diffstat (limited to 'lib/voices/finnish/suo_fi_lj_diphone/festvox/suopuhe_filter.perl')
-rwxr-xr-xlib/voices/finnish/suo_fi_lj_diphone/festvox/suopuhe_filter.perl75
1 files changed, 75 insertions, 0 deletions
diff --git a/lib/voices/finnish/suo_fi_lj_diphone/festvox/suopuhe_filter.perl b/lib/voices/finnish/suo_fi_lj_diphone/festvox/suopuhe_filter.perl
new file mode 100755
index 0000000..db0912a
--- /dev/null
+++ b/lib/voices/finnish/suo_fi_lj_diphone/festvox/suopuhe_filter.perl
@@ -0,0 +1,75 @@
+#!/usr/bin/perl -w
+use strict;
+#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#;; ;;
+#;; Department of General Linguistics / Suopuhe project ;;
+#;; University of Helsinki, FI ;;
+#;; Copyright (c) 2000,2001,2002,2003 ;;
+#;; All Rights Reserved. ;;
+#;; ;;
+#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+#; This program is distributed under Gnu Lesser General Public License (cf. the
+#; file LICENSE in distribution).
+
+#; This program is free software; you can redistribute it and/or modify
+#; it under the terms of the GNU Lesser General Public License as published by
+#; the Free Software Foundation; either version 2 of the License, or
+#; (at your option) any later version.
+
+#; This program is distributed in the hope that it will be useful,
+#; but WITHOUT ANY WARRANTY; without even the implied warranty of
+#; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#; GNU Lesser General Public License for more details.
+
+# suopuheen BASH-filtteri
+# syötteen tulisi olla lause/rivi -muotoista
+
+# Tue Sep 9 11:23:05 EEST 2003
+# -Added of forgotten 'g': s/// => s///g which caused list intonation to
+# remove wrong words
+
+# Fri Oct 26 12:04:40 EEST 2001
+# -Added partial sayas-replacement already here, because of Festival
+# (rxp-parser?) bug.
+
+# read the whole input into $_
+undef $/;
+$_ = <>;
+
+# kommentit pois
+s/<!\-\-.*?\-\->//gs;
+s/\s+/ /gs;
+
+# XML-parserin bugeja:
+s/ original=\"<\"//g;
+s/ original=\"\&\"//g;
+
+#> yksi kaksi <# muuttuu muotoon #> yksi-kaksi <#
+# (korjaa raa'asti festarin token-mokan):
+while ( s/> ([A-Za-zåäö\-]+) ([A-Za-zåäö])/> $1-$2/ ) {}
+# tyhjät pois:
+s/<token( +[a-z]+=\"[^\"]*\")*> +<\/token>\s*//g;
+
+
+# allow phrase and break only in mid positon of token
+while ( s/(<break\/>)\s*(<\/token>)/$2 $1/ ||
+ s/(<phrase\/>)\s*(<\/token>)/$2 $1/ ) {}
+
+while ( s/(<token( +[a-z]+=\"[^\"]*\")*>)\s+(<(break|phrase)\/>)/$3 $1/g ) {}
+
+# eliminate <break/> <phrase\/> sequences: the first one wins
+
+s/(<(break|phrase)\/>)( <(break|phrase)\/>)*/$1/g;
+
+
+
+
+
+
+s/> />\n/g;
+s/ </\n</g;
+
+print $_;