blob: 1e12d68bb96e42b5abde8cd45f58f5a2dfa4372d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
#!/usr/bin/perl -w
use strict;
#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#;; ;;
#;; Department of General Linguistics / Suopuhe project ;;
#;; University of Helsinki, FI ;;
#;; Copyright (c) 2000,2001,2002,2003 ;;
#;; All Rights Reserved. ;;
#;; ;;
#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#; This program is distributed under Gnu Lesser General Public License (cf. the
#; file LICENSE in distribution).
#; This program is free software; you can redistribute it and/or modify
#; it under the terms of the GNU Lesser General Public License as published by
#; the Free Software Foundation; either version 2 of the License, or
#; (at your option) any later version.
#; This program is distributed in the hope that it will be useful,
#; but WITHOUT ANY WARRANTY; without even the implied warranty of
#; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#; GNU Lesser General Public License for more details.
# suopuheen BASH-filtteri
# sy�tteen tulisi olla lause/rivi -muotoista
# Fri Oct 26 12:04:40 EEST 2001
# -Added partial sayas-replacement already here, because of Festival
# (rxp-parser?) bug.
undef $/;
$_ = <>;
# kommentit pois
s/<!\-\-.*?\-\->//gs;
s/\s+/ /gs;
s/ original=\"<\"//g; # XML-parseri bugaa t�h�n
s/ original=\"\&\"//g;
#> yksi kaksi <# muuttuu muotoon #> yksi-kaksi <#
# (korjaa raa'asti festarin token-mokan):
while ( s/> ([A-Za-z���\-]+) ([A-Za-z���])/> $1-$2/ ) {}
# tyhj�t pois:
s/<token( +[a-z]+=\"[^\"]*\")*> +<\/token>\s*//;
s/> />\n/g;
s/ </\n</g;
print $_;
#my $status = 1;
#while ( <> ) {
# if ( /<utterance>/ ) { $status = 1; }
# # skipataan aloittavat puncit
# # voisi olla parempi ehk� lukea ne...
# elsif ( /<token pos=\"punc\">/ ) { next; }
# else { $status = 0; }
# # delete comments...
# s/<!\-\-.*?\-\->//g;
#
# s/ original=\"<\"//g; # XML-parseri bugaa t�h�n
# s/ original=\"\&\"//g;
#
# # #> yksi kaksi <# muuttuu muotoon #> yksikaksi <#
# while ( s/> ([A-Za-z���]+) ([A-Za-z���])/> $1$2/ ) {}
#
# # tyhj�t pois... (toimiikohan)
# s/<token( +[a-z]+=\"[^\"]*\")*> +<\/token>//;
#
#
#
# print;
#}
|