blob: b3230ac3764a17a7589fbcf3a3455baf3e229548 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
package morfologik.stemming;
import static morfologik.stemming.DictionaryLookupTest.assertNoStemFor;
import static morfologik.stemming.DictionaryLookupTest.stem;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.HashSet;
import java.util.TreeSet;
import org.junit.Ignore;
import org.junit.Test;
/*
*
*/
public class PolishStemmerTest {
/* */
@Test
public void testLexemes() throws IOException {
PolishStemmer s = new PolishStemmer();
assertEquals("żywotopisarstwo", stem(s, "żywotopisarstwie")[0]);
assertEquals("abradować", stem(s, "abradowałoby")[0]);
assertArrayEquals(new String[] { "żywotopisarstwo", "subst:sg:loc:n" },
stem(s, "żywotopisarstwie"));
assertArrayEquals(new String[] { "bazia", "subst:pl:inst:f" }, stem(s,
"baziami"));
// This word is not in the dictionary.
assertNoStemFor(s, "martygalski");
}
/* */
@Test
@Ignore
public void listUniqueTags() throws IOException {
HashSet<String> forms = new HashSet<String>();
for (WordData wd : new PolishStemmer()) {
final CharSequence chs = wd.getTag();
if (chs == null) {
System.err.println("Missing tag for: " + wd.getWord());
continue;
}
forms.add(chs.toString());
}
for (String s : new TreeSet<String>(forms)) {
System.out.println(s);
}
}
}
|