summaryrefslogtreecommitdiff
path: root/morfologik-polish/src/main/java/morfologik/stemming/PolishStemmer.java
blob: ac346e9fa202bc939332d37458eed976ddbccc9e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package morfologik.stemming;

import java.util.*;

/**
 * A dictionary-based stemmer for the Polish language. This stemmer requires an
 * FSA-compiled dictionary to be present in classpath resources.
 * 
 * <b>Objects of this class are not thread safe.</b>
 * 
 * @see morfologik.stemming.DictionaryLookup
 */
public final class PolishStemmer implements IStemmer, Iterable<WordData> {
	/**
	 * Dictionary lookup delegate.
	 */
	private final List<DictionaryLookup> delegate = new ArrayList<DictionaryLookup>();

	/*
	 * 
	 */
	public PolishStemmer() {
	    delegate.add(new DictionaryLookup(Dictionary.getForLanguage("pl")));
	}

	/**
	 * {@inheritDoc}
	 */
	public List<WordData> lookup(CharSequence word) {
	    if (delegate.size() == 1) {
	        return delegate.get(0).lookup(word);
	    } else {
    	    List<WordData> forms = null;
    	    for (DictionaryLookup lookup : delegate) {
    	        forms = lookup.lookup(word);
    	        if (forms.size() > 0)
    	            break;
    	    }
    	    return forms;
	    }
	}

	/**
	 * Iterates over all dictionary forms stored in this stemmer.
	 */
	public Iterator<WordData> iterator() {
        if (delegate.size() == 1) {
            return delegate.get(0).iterator();
        } else {
            throw new RuntimeException("No iteration over compound stemmer forms: "
                + Arrays.toString(delegate.toArray()));
        }
	}
}