summaryrefslogtreecommitdiff
path: root/src-test/morfologik/fsa/FSA5Test.java
blob: 869dfeb5d340c6c1c4a4abf77133f484ad7e80b4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package morfologik.fsa;

import static morfologik.fsa.FSAFlags.NEXTBIT;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import morfologik.stemming.Dictionary;

import org.junit.Test;

/**
 * Additional tests for {@link FSA5}.
 */
public final class FSA5Test {
	public ArrayList<String> expected = new ArrayList<String>(Arrays.asList(
	        "a", "aba", "ac", "b", "ba", "c"));

	@Test
	public void testVersion5() throws IOException {
		final FSA fsa = FSA.read(this.getClass().getResourceAsStream("abc.fsa"));
		assertFalse(fsa.getFlags().contains(FSAFlags.NUMBERS));
		verifyContent(expected, fsa);
	}

	@Test
	public void testVersion5WithNumbers() throws IOException {
		final FSA fsa = FSA.read(this.getClass().getResourceAsStream("abc-numbers.fsa"));

		verifyContent(expected, fsa);
		assertTrue(fsa.getFlags().contains(FSAFlags.NUMBERS));
	}

	@Test
	public void testArcsAndNodes() throws IOException {
		final FSA fsa1 = FSA.read(this.getClass().getResourceAsStream(
		        "abc.fsa"));
		final FSA fsa2 = FSA.read(this.getClass().getResourceAsStream(
		        "abc-numbers.fsa"));

		FSAInfo info1 = new FSAInfo(fsa1);
		FSAInfo info2 = new FSAInfo(fsa2);

		assertEquals(info1.arcsCount, info2.arcsCount);
		assertEquals(info1.nodeCount, info2.nodeCount);

		assertEquals(4, info2.nodeCount);
		assertEquals(7, info2.arcsCount);
	}
	
	@Test
	public void testArcsAndNodesLarge() throws IOException {
		final FSA fsa3 = Dictionary.getForLanguage("pl").fsa;
		FSAInfo info3 = new FSAInfo(fsa3);

		assertEquals(293329, info3.nodeCount);
		assertEquals(679676, info3.arcsCount);
		assertEquals(3672200, info3.finalStatesCount);
	}

	@Test
	public void testNumbers() throws IOException {
		final FSA5 fsa = FSA.read(this.getClass().getResourceAsStream("abc-numbers.fsa"));

		assertTrue(fsa.getFlags().contains(NEXTBIT));

		// Get all numbers for nodes.
		byte[] buffer = new byte[128];
		final ArrayList<String> result = new ArrayList<String>();
		walkNode(buffer, 0, fsa, fsa.getRootNode(), 0, result);

		Collections.sort(result);
		assertEquals(Arrays
		        .asList("0 c", "1 b", "2 ba", "3 a", "4 ac", "5 aba"), result);
	}

	public static void walkNode(byte[] buffer, int depth, FSA fsa, int node,
	        int cnt, List<String> result) throws IOException {
		for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
			buffer[depth] = fsa.getArcLabel(arc);

			if (fsa.isArcFinal(arc) || fsa.isArcTerminal(arc)) {
				result.add(cnt + " " + new String(buffer, 0, depth + 1, "UTF-8"));
			}

			if (fsa.isArcFinal(arc))
				cnt++;

			if (!fsa.isArcTerminal(arc)) {
				walkNode(buffer, depth + 1, fsa, fsa.getEndNode(arc), cnt, result);
				cnt += fsa.getRightLanguageCount(fsa.getEndNode(arc));
			}
		}
	}

	private static void verifyContent(List<String> expected, FSA fsa) throws IOException {
		final ArrayList<String> actual = new ArrayList<String>();

		int count = 0;
		for (ByteBuffer bb : fsa.getSequences()) {
			assertEquals(0, bb.arrayOffset());
			assertEquals(0, bb.position());
			actual.add(new String(bb.array(), 0, bb.remaining(), "UTF-8"));
			count++;
		}
		assertEquals(expected.size(), count);
		Collections.sort(actual);
		assertEquals(expected, actual);
	}
}