summaryrefslogtreecommitdiff
path: root/morfologik-tools/src/test/java/morfologik/tools
diff options
context:
space:
mode:
Diffstat (limited to 'morfologik-tools/src/test/java/morfologik/tools')
-rw-r--r--morfologik-tools/src/test/java/morfologik/tools/FSABuildToolTest.java53
-rw-r--r--morfologik-tools/src/test/java/morfologik/tools/LauncherTest.java26
-rw-r--r--morfologik-tools/src/test/java/morfologik/tools/MorphEncodingToolTest.java243
-rw-r--r--morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java106
-rw-r--r--morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersStaticTest.java96
-rw-r--r--morfologik-tools/src/test/java/morfologik/tools/Text2FSA5Test.java37
6 files changed, 561 insertions, 0 deletions
diff --git a/morfologik-tools/src/test/java/morfologik/tools/FSABuildToolTest.java b/morfologik-tools/src/test/java/morfologik/tools/FSABuildToolTest.java
new file mode 100644
index 0000000..4d45f9c
--- /dev/null
+++ b/morfologik-tools/src/test/java/morfologik/tools/FSABuildToolTest.java
@@ -0,0 +1,53 @@
+package morfologik.tools;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+import java.nio.charset.Charset;
+
+import org.hamcrest.core.StringContains;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
+import com.google.common.io.ByteStreams;
+import com.google.common.io.Files;
+
+public class FSABuildToolTest
+{
+ /* */
+ @Test
+ public void testStemmingFile() throws Exception {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "in");
+ File output = File.createTempFile("output", "fsa.txt");
+ input.deleteOnExit();
+ output.deleteOnExit();
+
+ // Populate the file with data.
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ // Emit UTF-8 BOM prefixed list of three strings.
+ baos.write(new byte [] {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF});
+ baos.write(Joiner.on('\n').join("abc", "def", "xyz").getBytes(Charsets.UTF_8));
+ Files.copy(ByteStreams.newInputStreamSupplier(baos.toByteArray()), input);
+
+ baos.reset();
+ PrintStream prev = System.err;
+ PrintStream ps = new PrintStream(baos);
+ System.setErr(ps);
+ try {
+ FSABuildTool.main(new String [] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath()
+ });
+ } finally {
+ System.setErr(prev);
+ }
+
+ String logs = new String(baos.toByteArray(), Charset.defaultCharset());
+ Assert.assertThat(logs, StringContains.containsString("UTF-8 BOM"));
+
+ System.out.println(logs);
+ }
+}
diff --git a/morfologik-tools/src/test/java/morfologik/tools/LauncherTest.java b/morfologik-tools/src/test/java/morfologik/tools/LauncherTest.java
new file mode 100644
index 0000000..8e1d0e9
--- /dev/null
+++ b/morfologik-tools/src/test/java/morfologik/tools/LauncherTest.java
@@ -0,0 +1,26 @@
+package morfologik.tools;
+
+import java.util.Map;
+
+import morfologik.tools.Launcher.ToolInfo;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/*
+ *
+ */
+public class LauncherTest {
+ /* */
+ @Test
+ public void testTools() throws Exception {
+ for (Map.Entry<String, ToolInfo> e : Launcher.initTools().entrySet()) {
+ try {
+ e.getValue().invoke(new String[] {"--help"});
+ } catch (Throwable t) {
+ Assert.fail("Unable to launch " + e.getKey() + ": "
+ + t.getMessage());
+ }
+ }
+ }
+}
diff --git a/morfologik-tools/src/test/java/morfologik/tools/MorphEncodingToolTest.java b/morfologik-tools/src/test/java/morfologik/tools/MorphEncodingToolTest.java
new file mode 100644
index 0000000..496880f
--- /dev/null
+++ b/morfologik-tools/src/test/java/morfologik/tools/MorphEncodingToolTest.java
@@ -0,0 +1,243 @@
+package morfologik.tools;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
+import java.io.PrintWriter;
+import java.util.List;
+
+import morfologik.fsa.FSA;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.DictionaryMetadataBuilder;
+import morfologik.stemming.EncoderType;
+import morfologik.stemming.WordData;
+
+import org.fest.assertions.api.Assertions;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+import com.google.common.base.Charsets;
+import com.google.common.io.Closer;
+
+/*
+ *
+ */
+public class MorphEncodingToolTest extends RandomizedTest {
+ private Closer closer = Closer.create();
+
+ @After
+ public void cleanup() throws IOException {
+ closer.close();
+ }
+
+ @Test
+ public void testTool() throws Exception {
+ // Create a simple plain text file.
+ File input = super.newTempFile();
+ File output = super.newTempFile();
+
+ // Populate the file with data.
+ PrintWriter w =
+ new PrintWriter(
+ new OutputStreamWriter(
+ closer.register(new FileOutputStream(input)), "UTF-8"));
+ w.println("passagère\tpassager\ttag");
+ w.println("nieduży\tduży\ttest");
+ w.print("abcd\tabc\txyz");
+ w.close();
+
+ // suffix
+ MorphEncodingTool.main(new String[] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath(),
+ "--encoder", "suffix" });
+
+ BufferedReader testOutput =
+ new BufferedReader(
+ new InputStreamReader(
+ closer.register(new FileInputStream(output.getAbsolutePath())), "UTF-8"));
+ Assert.assertEquals("passagère+Eer+tag", testOutput.readLine());
+ Assert.assertEquals("nieduży+Iduży+test", testOutput.readLine());
+ Assert.assertEquals("abcd+B+xyz", testOutput.readLine());
+
+ // prefix
+ MorphEncodingTool.main(new String[] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath(),
+ "--encoder", "prefix" });
+
+ testOutput =
+ new BufferedReader(
+ new InputStreamReader(
+ closer.register(new FileInputStream(output.getAbsolutePath())), "UTF-8"));
+ Assert.assertEquals("passagère+AEer+tag", testOutput.readLine());
+ Assert.assertEquals("nieduży+DA+test", testOutput.readLine());
+ Assert.assertEquals("abcd+AB+xyz", testOutput.readLine());
+
+ // infix
+ MorphEncodingTool.main(new String[] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath(),
+ "--encoder", "infix" });
+
+ testOutput =
+ new BufferedReader(
+ new InputStreamReader(
+ closer.register(new FileInputStream(output.getAbsolutePath())), "UTF-8"));
+ Assert.assertEquals("passagère+GDAr+tag", testOutput.readLine());
+ Assert.assertEquals("nieduży+ADA+test", testOutput.readLine());
+ Assert.assertEquals("abcd+AAB+xyz", testOutput.readLine());
+
+ // custom annotation - test tabs
+ MorphEncodingTool.main(new String[] {
+ "--annotation", "\t",
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath(),
+ "--encoder", "infix" });
+
+ testOutput =
+ new BufferedReader(
+ new InputStreamReader(
+ closer.register(new FileInputStream(output.getAbsolutePath())), "UTF-8"));
+ Assert.assertEquals("passagère\tGDAr\ttag", testOutput.readLine());
+ Assert.assertEquals("nieduży\tADA\ttest", testOutput.readLine());
+ Assert.assertEquals("abcd\tAAB\txyz", testOutput.readLine());
+ }
+
+ /* */
+ @Test
+ public void testStemmingFile() throws Exception {
+ // Create a simple plain text file.
+ File input = super.newTempFile();
+ File output = super.newTempFile();
+
+ PrintWriter w =
+ new PrintWriter(
+ new OutputStreamWriter(
+ closer.register(new FileOutputStream(input)), "UTF-8"));
+ w.println("passagère\tpassager");
+ w.println("nieduży\tduży");
+ w.println();
+ w.println("abcd\tabc");
+ w.close();
+
+ MorphEncodingTool.main(new String[] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath(),
+ "-e", "suffix" });
+
+ BufferedReader testOutput =
+ new BufferedReader(
+ new InputStreamReader(
+ closer.register(new FileInputStream(output.getAbsolutePath())), "UTF-8"));
+ Assert.assertEquals("passagère+Eer+", testOutput.readLine());
+ Assert.assertEquals("nieduży+Iduży+", testOutput.readLine());
+ Assert.assertEquals("abcd+B+", testOutput.readLine());
+
+ testOutput.close();
+ }
+
+ /* */
+ @Test
+ public void testZeroByteSeparator() throws Exception {
+ // Create a simple plain text file.
+ File input = newTempFile();
+ File output = newTempFile();
+
+ // Populate the file with data.
+ PrintWriter w =
+ new PrintWriter(
+ new OutputStreamWriter(
+ closer.register(new FileOutputStream(input)), "UTF-8"));
+ w.println("passagère\tpassager\tTAG1");
+ w.println("nieduży\tduży\tTAG2");
+ w.println("abcd\tabc\tTAG3");
+ w.close();
+
+ MorphEncodingTool.main(new String[] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath(),
+ "-e", "suffix",
+ "--annotation", "\u0000"});
+
+ BufferedReader testOutput =
+ new BufferedReader(
+ new InputStreamReader(
+ closer.register(new FileInputStream(output.getAbsolutePath())), "UTF-8"));
+
+ Assert.assertEquals("passagère\u0000Eer\u0000TAG1", testOutput.readLine());
+ Assert.assertEquals("nieduży\u0000Iduży\u0000TAG2", testOutput.readLine());
+ Assert.assertEquals("abcd\u0000B\u0000TAG3", testOutput.readLine());
+
+ File fsaFile = newTempFile();
+ FSABuildTool.main(
+ "--input", output.getAbsolutePath(),
+ "--output", fsaFile.getAbsolutePath());
+
+ FSA fsa = FSA.read(fsaFile);
+ DictionaryLookup dl = new DictionaryLookup(
+ new Dictionary(
+ fsa,
+ new DictionaryMetadataBuilder()
+ .separator((char) 0)
+ .encoding(Charsets.UTF_8)
+ .encoder(EncoderType.SUFFIX)
+ .build()));
+
+ checkEntry(dl, "passagère", "passager", "TAG1");
+ checkEntry(dl, "nieduży", "duży", "TAG2");
+ checkEntry(dl, "abcd", "abc", "TAG3");
+ }
+
+ /* */
+ @Test
+ public void testAnnotationCharacterInBaseOrDerivedWord() throws Exception {
+ // Create a simple plain text file.
+ File input = newTempFile();
+ File output = newTempFile();
+
+ // Populate the file with data.
+ PrintWriter w =
+ new PrintWriter(
+ new OutputStreamWriter(
+ closer.register(new FileOutputStream(input)), "UTF-8"));
+ w.println("foo+\tbar-\tTAG1");
+ w.close();
+
+ PrintStream err = System.err;
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ try {
+ System.setErr(new PrintStream(baos, true, "UTF-8"));
+ MorphEncodingTool.main(new String[] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath(),
+ "-e", "suffix",
+ "--annotation", "+"});
+ } finally {
+ System.err.flush();
+ System.setErr(err);
+ }
+
+ Assertions.assertThat(new String(baos.toByteArray(), Charsets.UTF_8))
+ .contains("contain the annotation byte");
+ }
+
+ private void checkEntry(DictionaryLookup dl, String word, String base, String tag) {
+ List<WordData> lookup = dl.lookup(word);
+ Assertions.assertThat(lookup.size()).isEqualTo(1);
+ WordData wordData = lookup.get(0);
+ Assertions.assertThat(wordData.getWord().toString()).isEqualTo(word);
+ Assertions.assertThat(wordData.getStem().toString()).isEqualTo(base);
+ Assertions.assertThat(wordData.getTag().toString()).isEqualTo(tag);
+ }
+}
diff --git a/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java b/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java
new file mode 100644
index 0000000..d0379d7
--- /dev/null
+++ b/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java
@@ -0,0 +1,106 @@
+package morfologik.tools;
+
+import java.nio.ByteBuffer;
+import java.util.List;
+
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.DictionaryMetadataBuilder;
+import morfologik.stemming.EncoderType;
+
+import org.junit.Test;
+
+import com.carrotsearch.hppc.ByteArrayList;
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+
+public class SequenceEncodersRandomizedTest extends RandomizedTest {
+ private final SequenceEncoders.IEncoder coder;
+
+ public SequenceEncodersRandomizedTest(@Name("coder") SequenceEncoders.IEncoder coder)
+ {
+ this.coder = coder;
+ }
+
+ @ParametersFactory
+ public static List<Object[]> testFactory() {
+ List<Object[]> encoders = Lists.newArrayList();
+ for (EncoderType t : EncoderType.values()) {
+ encoders.add(new Object [] {SequenceEncoders.forType(t)});
+ }
+ return encoders;
+ }
+
+ @Test
+ public void testEncodeSuffixOnRandomSequences() {
+ for (int i = 0; i < 10000; i++) {
+ assertRoundtripEncode(
+ randomAsciiOfLengthBetween(0, 500),
+ randomAsciiOfLengthBetween(0, 500));
+ }
+ }
+
+ @Test
+ public void testEncodeSamples() {
+ assertRoundtripEncode("", "");
+ assertRoundtripEncode("abc", "ab");
+ assertRoundtripEncode("abc", "abx");
+ assertRoundtripEncode("ab", "abc");
+ assertRoundtripEncode("xabc", "abc");
+ assertRoundtripEncode("axbc", "abc");
+ assertRoundtripEncode("axybc", "abc");
+ assertRoundtripEncode("axybc", "abc");
+ assertRoundtripEncode("azbc", "abcxy");
+
+ assertRoundtripEncode("Niemcami", "Niemiec");
+ assertRoundtripEncode("Niemiec", "Niemcami");
+ }
+
+ private void assertRoundtripEncode(String srcString, String dstString)
+ {
+ ByteArrayList src = ByteArrayList.from(srcString.getBytes(UTF8));
+ ByteArrayList dst = ByteArrayList.from(dstString.getBytes(UTF8));
+ ByteArrayList encoded = ByteArrayList.newInstance();
+ ByteArrayList decoded = ByteArrayList.newInstance();
+
+ coder.encode(src, dst, encoded);
+ coder.decode(src, encoded, decoded);
+
+ if (!dst.equals(decoded)) {
+ System.out.println("src: " + new String(src.toArray(), Charsets.UTF_8));
+ System.out.println("dst: " + new String(dst.toArray(), Charsets.UTF_8));
+ System.out.println("enc: " + new String(encoded.toArray(), Charsets.UTF_8));
+ System.out.println("dec: " + new String(decoded.toArray(), Charsets.UTF_8));
+ }
+
+ assertEquals(dst, decoded);
+
+ // DictionaryLookup.decodeBaseForm decoding testing
+ DictionaryMetadataBuilder builder = new DictionaryMetadataBuilder();
+ builder.encoding(Charsets.UTF_8);
+ builder.encoder(coder.type());
+
+ ByteBuffer bb = DictionaryLookup.decodeBaseForm(
+ ByteBuffer.allocate(0),
+ encoded.toArray(),
+ encoded.size(),
+ ByteBuffer.wrap(src.toArray()), builder.build());
+
+ ByteArrayList decoded2 = ByteArrayList.newInstance();
+ bb.flip();
+ while (bb.hasRemaining()) decoded2.add(bb.get());
+
+ if (!dst.equals(decoded2)) {
+ System.out.println("DictionaryLookup.decodeBaseForm incorrect, coder: " + coder);
+ System.out.println("src : " + new String(src.toArray(), Charsets.UTF_8));
+ System.out.println("dst : " + new String(dst.toArray(), Charsets.UTF_8));
+ System.out.println("enc : " + new String(encoded.toArray(), Charsets.UTF_8));
+ System.out.println("dec : " + new String(decoded.toArray(), Charsets.UTF_8));
+ System.out.println("dec2: " + new String(decoded2.toArray(), Charsets.UTF_8));
+ }
+
+ assertEquals(dst, decoded2);
+ }
+}
diff --git a/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersStaticTest.java b/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersStaticTest.java
new file mode 100644
index 0000000..3f1625d
--- /dev/null
+++ b/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersStaticTest.java
@@ -0,0 +1,96 @@
+package morfologik.tools;
+
+import static org.junit.Assert.*;
+
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+
+import morfologik.stemming.EncoderType;
+
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+
+/*
+ *
+ */
+public class SequenceEncodersStaticTest {
+ private SequenceAssembler suffix = new SequenceAssembler(new SequenceEncoders.TrimSuffixEncoder());
+ private SequenceAssembler prefix = new SequenceAssembler(new SequenceEncoders.TrimPrefixAndSuffixEncoder());
+ private SequenceAssembler infix = new SequenceAssembler(new SequenceEncoders.TrimInfixAndSuffixEncoder());
+
+ @Test
+ public void testStandardEncode() throws Exception {
+ assertEquals("abc+Ad+tag", encode(suffix, "abc", "abcd", "tag"));
+ assertEquals("abc+Dxyz+tag", encode(suffix, "abc", "xyz", "tag"));
+ assertEquals("abc+Bć+tag", encode(suffix, "abc", "abć", "tag"));
+ }
+
+ @Test
+ public void testSeparatorChange() throws Exception {
+ assertEquals("abc+Ad+tag", encode(suffix, "abc", "abcd", "tag"));
+
+ SequenceAssembler assembler = new SequenceAssembler(new SequenceEncoders.TrimSuffixEncoder(), (byte) '_');
+ assertEquals("abc_Ad_tag", encode(assembler, "abc", "abcd", "tag"));
+
+ assembler = new SequenceAssembler(new SequenceEncoders.TrimSuffixEncoder(), (byte) '\t');
+ assertEquals("abc\tAd\ttag", encode(assembler, "abc", "abcd", "tag"));
+ }
+
+ @Test
+ public void testPrefixEncode() throws UnsupportedEncodingException {
+ assertEquals("abc+AAd+tag", encode(prefix, "abc", "abcd", "tag"));
+ assertEquals("abcd+AB+tag", encode(prefix, "abcd", "abc", "tag"));
+ assertEquals("abc+ADxyz+tag", encode(prefix, "abc", "xyz", "tag"));
+ assertEquals("abc+ABć+tag", encode(prefix, "abc", "abć", "tag"));
+ assertEquals("postmodernizm+AAu+xyz", encode(prefix, "postmodernizm", "postmodernizmu", "xyz"));
+ assertEquals("postmodernizmu+AB+xyz", encode(prefix, "postmodernizmu", "postmodernizm", "xyz"));
+ assertEquals("nieduży+DA+adj", encode(prefix, "nieduży", "duży", "adj"));
+ assertEquals("postmodernizm+EA+xyz", encode(prefix, "postmodernizm", "modernizm", "xyz"));
+ }
+
+ @Test
+ public void testInfixEncode() throws UnsupportedEncodingException {
+ assertEquals("ayz+AACbc+tag", encode(infix, "ayz", "abc", "tag"));
+ assertEquals("xyz+AADabc+tag", encode(infix, "xyz", "abc", "tag"));
+
+ assertEquals("abc+AAAd+tag", encode(infix, "abc", "abcd", "tag"));
+ assertEquals("abcd+AAB+tag", encode(infix, "abcd", "abc", "tag"));
+ assertEquals("abc+AADxyz+tag", encode(infix, "abc", "xyz", "tag"));
+ assertEquals("abc+AABć+tag", encode(infix, "abc", "abć", "tag"));
+ assertEquals("postmodernizm+AAAu+xyz", encode(infix, "postmodernizm", "postmodernizmu", "xyz"));
+ assertEquals("postmodernizmu+AAB+xyz", encode(infix, "postmodernizmu", "postmodernizm", "xyz"));
+ assertEquals("nieduży+ADA+adj", encode(infix, "nieduży", "duży", "adj"));
+
+ // real infix cases
+ assertEquals("kcal+ABA+xyz", encode(infix, "kcal", "cal", "xyz"));
+ assertEquals("aillent+BBCr+xyz", encode(infix, "aillent", "aller", "xyz"));
+ assertEquals("laquelle+AGAquel+D f s", encode(infix, "laquelle", "lequel", "D f s"));
+ assertEquals("ccal+ABA+test", encode(infix, "ccal", "cal", "test"));
+ assertEquals("ccal+ABA+test", encode(infix, "ccal", "cal", "test"));
+ }
+
+ @Test
+ public void testUTF8Boundary() throws Exception {
+ assertEquals("passagère+Eer+tag", encode(suffix, "passagère", "passager", "tag"));
+ assertEquals("passagère+GDAr+tag", encode(infix, "passagère", "passager", "tag"));
+ assertEquals("passagère+AEer+tag", encode(prefix, "passagère", "passager", "tag"));
+ }
+
+ @Test
+ public void testAllEncodersHaveImplementations() {
+ for (EncoderType t : EncoderType.values()) {
+ assertNotNull(null != SequenceEncoders.forType(t));
+ }
+ }
+
+ private String encode(SequenceAssembler assembler, String wordForm,
+ String wordLemma, String wordTag)
+ {
+ Charset UTF8 = Charsets.UTF_8;
+ return new String(assembler.encode(
+ wordForm.getBytes(UTF8),
+ wordLemma.getBytes(UTF8),
+ wordTag.getBytes(UTF8)), UTF8);
+ }
+}
diff --git a/morfologik-tools/src/test/java/morfologik/tools/Text2FSA5Test.java b/morfologik-tools/src/test/java/morfologik/tools/Text2FSA5Test.java
new file mode 100644
index 0000000..573c5da
--- /dev/null
+++ b/morfologik-tools/src/test/java/morfologik/tools/Text2FSA5Test.java
@@ -0,0 +1,37 @@
+package morfologik.tools;
+
+import java.io.*;
+
+import morfologik.fsa.*;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/*
+ *
+ */
+public class Text2FSA5Test {
+ @Test
+ public void testTool() throws Exception {
+ // Create a simple plain text file.
+ File input = File.createTempFile("input", "in");
+ File output = File.createTempFile("output", "fsa");
+ input.deleteOnExit();
+ output.deleteOnExit();
+
+ // Populate the file with data.
+ PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input), "UTF-8"));
+ w.println("b");
+ w.println("cab");
+ w.println("ab");
+ w.close();
+
+ FSABuildTool.main(new String [] {
+ "--input", input.getAbsolutePath(),
+ "--output", output.getAbsolutePath()
+ });
+
+ FSA5 fsa = FSA.read(new FileInputStream(output));
+ Assert.assertEquals(3, new FSAInfo(fsa).finalStatesCount);
+ }
+}