summaryrefslogtreecommitdiff
path: root/lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl')
-rwxr-xr-xlib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl79
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl b/lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl
new file mode 100755
index 0000000..1e12d68
--- /dev/null
+++ b/lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl
@@ -0,0 +1,79 @@
+#!/usr/bin/perl -w
+use strict;
+#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#;; ;;
+#;; Department of General Linguistics / Suopuhe project ;;
+#;; University of Helsinki, FI ;;
+#;; Copyright (c) 2000,2001,2002,2003 ;;
+#;; All Rights Reserved. ;;
+#;; ;;
+#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+#; This program is distributed under Gnu Lesser General Public License (cf. the
+#; file LICENSE in distribution).
+
+#; This program is free software; you can redistribute it and/or modify
+#; it under the terms of the GNU Lesser General Public License as published by
+#; the Free Software Foundation; either version 2 of the License, or
+#; (at your option) any later version.
+
+#; This program is distributed in the hope that it will be useful,
+#; but WITHOUT ANY WARRANTY; without even the implied warranty of
+#; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#; GNU Lesser General Public License for more details.
+
+# suopuheen BASH-filtteri
+# sytteen tulisi olla lause/rivi -muotoista
+
+# Fri Oct 26 12:04:40 EEST 2001
+# -Added partial sayas-replacement already here, because of Festival
+# (rxp-parser?) bug.
+
+undef $/;
+
+$_ = <>;
+
+# kommentit pois
+s/<!\-\-.*?\-\->//gs;
+s/\s+/ /gs;
+
+s/ original=\"<\"//g; # XML-parseri bugaa thn
+s/ original=\"\&\"//g;
+
+#> yksi kaksi <# muuttuu muotoon #> yksi-kaksi <#
+# (korjaa raa'asti festarin token-mokan):
+while ( s/> ([A-Za-z\-]+) ([A-Za-z])/> $1-$2/ ) {}
+# tyhjt pois:
+s/<token( +[a-z]+=\"[^\"]*\")*> +<\/token>\s*//;
+s/> />\n/g;
+s/ </\n</g;
+
+print $_;
+
+#my $status = 1;
+#while ( <> ) {
+# if ( /<utterance>/ ) { $status = 1; }
+# # skipataan aloittavat puncit
+# # voisi olla parempi ehk lukea ne...
+# elsif ( /<token pos=\"punc\">/ ) { next; }
+# else { $status = 0; }
+# # delete comments...
+# s/<!\-\-.*?\-\->//g;
+#
+# s/ original=\"<\"//g; # XML-parseri bugaa thn
+# s/ original=\"\&\"//g;
+#
+# # #> yksi kaksi <# muuttuu muotoon #> yksikaksi <#
+# while ( s/> ([A-Za-z]+) ([A-Za-z])/> $1$2/ ) {}
+#
+# # tyhjt pois... (toimiikohan)
+# s/<token( +[a-z]+=\"[^\"]*\")*> +<\/token>//;
+#
+#
+#
+# print;
+#}
+
+