diff options
author | Niko Tyni <ntyni@iki.fi> | 2005-10-18 14:32:20 +0200 |
---|---|---|
committer | Niko Tyni <ntyni@iki.fi> | 2005-10-18 14:32:20 +0200 |
commit | 8e03a58d62f4b8c00978634f116819ff246c09d3 (patch) | |
tree | 29284d785fe91b3473a19dcc257dfaa6066f4b12 /lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl |
Import festvox-suopuhe-mv_20041119.orig.tar.gz
[dgit import orig festvox-suopuhe-mv_20041119.orig.tar.gz]
Diffstat (limited to 'lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl')
-rwxr-xr-x | lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl b/lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl new file mode 100755 index 0000000..1e12d68 --- /dev/null +++ b/lib/voices/finnish/hy_fi_mv_diphone/festvox/suopuhe_filter.perl @@ -0,0 +1,79 @@ +#!/usr/bin/perl -w +use strict; +#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +#;; ;; +#;; Department of General Linguistics / Suopuhe project ;; +#;; University of Helsinki, FI ;; +#;; Copyright (c) 2000,2001,2002,2003 ;; +#;; All Rights Reserved. ;; +#;; ;; +#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +#; This program is distributed under Gnu Lesser General Public License (cf. the +#; file LICENSE in distribution). + +#; This program is free software; you can redistribute it and/or modify +#; it under the terms of the GNU Lesser General Public License as published by +#; the Free Software Foundation; either version 2 of the License, or +#; (at your option) any later version. + +#; This program is distributed in the hope that it will be useful, +#; but WITHOUT ANY WARRANTY; without even the implied warranty of +#; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#; GNU Lesser General Public License for more details. + +# suopuheen BASH-filtteri +# syötteen tulisi olla lause/rivi -muotoista + +# Fri Oct 26 12:04:40 EEST 2001 +# -Added partial sayas-replacement already here, because of Festival +# (rxp-parser?) bug. + +undef $/; + +$_ = <>; + +# kommentit pois +s/<!\-\-.*?\-\->//gs; +s/\s+/ /gs; + +s/ original=\"<\"//g; # XML-parseri bugaa tähän +s/ original=\"\&\"//g; + +#> yksi kaksi <# muuttuu muotoon #> yksi-kaksi <# +# (korjaa raa'asti festarin token-mokan): +while ( s/> ([A-Za-zåäö\-]+) ([A-Za-zåäö])/> $1-$2/ ) {} +# tyhjät pois: +s/<token( +[a-z]+=\"[^\"]*\")*> +<\/token>\s*//; +s/> />\n/g; +s/ </\n</g; + +print $_; + +#my $status = 1; +#while ( <> ) { +# if ( /<utterance>/ ) { $status = 1; } +# # skipataan aloittavat puncit +# # voisi olla parempi ehkä lukea ne... +# elsif ( /<token pos=\"punc\">/ ) { next; } +# else { $status = 0; } +# # delete comments... +# s/<!\-\-.*?\-\->//g; +# +# s/ original=\"<\"//g; # XML-parseri bugaa tähän +# s/ original=\"\&\"//g; +# +# # #> yksi kaksi <# muuttuu muotoon #> yksikaksi <# +# while ( s/> ([A-Za-zåäö]+) ([A-Za-zåäö])/> $1$2/ ) {} +# +# # tyhjät pois... (toimiikohan) +# s/<token( +[a-z]+=\"[^\"]*\")*> +<\/token>//; +# +# +# +# print; +#} + + |