11 files changed, 2332 insertions, 0 deletions
diff --git a/morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java b/morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java
new file mode 100644
index 0000000..687b6cb
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java
@@ -0,0 +1,541 @@
+package morfologik.tools;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
+
+import morfologik.fsa.CFSA2Serializer;
+import morfologik.fsa.FSA;
+import morfologik.fsa.FSA5Serializer;
+import morfologik.fsa.FSABuilder;
+import morfologik.fsa.FSAFlags;
+import morfologik.fsa.FSAInfo;
+import morfologik.fsa.FSASerializer;
+import morfologik.fsa.FSAUtils;
+import morfologik.fsa.IMessageLogger;
+import morfologik.fsa.StateVisitor;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.StringEscapeUtils;
+
+import com.carrotsearch.hppc.IntIntOpenHashMap;
+import com.carrotsearch.hppc.cursors.IntIntCursor;
+
+/**
+ * Convert from plain text input to a serialized FSA in any of the
+ * available {@link Format}s.
+ */
+public final class FSABuildTool extends Tool {
+    /**
+     * One megabyte.
+     */
+    private final static int MB = 1024 * 1024;
+    
+    /**
+     * The serialization format to use for the binary output.
+     */
+    public enum Format {
+        FSA5, 
+        CFSA2;
+
+        public FSASerializer getSerializer() {
+            switch (this) {
+                case FSA5:
+                    return new FSA5Serializer();
+
+                case CFSA2:
+                    return new CFSA2Serializer();
+
+                default:
+                    throw new RuntimeException();
+            }
+        }
+    }
+
+    /**
+     * Be more verbose about progress.
+     */
+    private boolean printProgress;
+    
+    /**
+     * Serializer used for emitting the FSA.
+     */
+    private FSASerializer serializer;
+
+    /**
+     * Output format name.
+     */
+    private Format format;
+    
+    /**
+     * Warn about CR characters in the input (usually not what you want).
+     */
+    private boolean crWarning = false;
+    
+    /**
+     * If <code>true</code>, the input is not buffered and sorted in-memory, but
+     * must be sorted externally (using the "C" convention: unsigned byte values).
+     */
+    private boolean inputSorted;
+    
+    /**
+     * Print additional statistics about the output automaton.
+     */
+    private boolean statistics;
+
+    /**
+     * The actual construction of the FSA.
+     */
+    private FSABuilder builder = new FSABuilder();
+    
+    /**
+     * Start time.
+     */
+    private long start = System.currentTimeMillis();
+
+    private IMessageLogger logger;
+
+    /**
+     * Gets fed with the lines read from the input.
+     */
+    private static interface LineConsumer {
+        /**
+         * Process the buffer, return the same buffer or a new buffer (for
+         * swapping).
+         */
+        byte[] process(byte[] buffer, int pos);
+    }
+
+    /**
+     * To help break out of the anonymous delegate on error.
+     */
+    @SuppressWarnings("serial")
+    private static class TerminateProgramException extends RuntimeException {
+        public TerminateProgramException(String msg) {
+            super(msg);
+        }
+
+        public synchronized Throwable fillInStackTrace() {
+            return null;
+        }
+    }
+    
+	/**
+	 * Command line entry point after parsing arguments.
+	 */
+	protected void go(CommandLine line) throws Exception {
+		String[] args = line.getArgs();
+		if (args.length != 0) {
+			printUsage();
+			return;
+		}
+		
+		// Parse the input options.
+        parseOptions(line);
+
+        logger = new WriterMessageLogger(new PrintWriter(System.err));
+        this.serializer.withLogger(logger);
+
+        BufferedInputStream inputStream = null;
+		try {
+		    inputStream = initializeInput(line);
+
+            if (inputSorted) {
+                logger.log("Assuming input is already sorted");
+            }
+
+            checkUtf8Bom(inputStream);
+
+		    final FSA fsa;
+		    if (inputSorted) {
+                fsa = processSortedInput(inputStream);
+		    } else {
+		        fsa = processUnsortedInput(inputStream);
+		    }
+
+	        if (crWarning) logger.log("Warning: input contained carriage returns?");
+
+            if (statistics) {
+                logger.startPart("Statistics");
+                FSAInfo info = new FSAInfo(fsa);
+                TreeMap<Integer, Integer> fanout = FSAUtils.calculateFanOuts(fsa, fsa.getRootNode());
+                logger.endPart();
+
+                final IntIntOpenHashMap numbers = new IntIntOpenHashMap();
+                fsa.visitInPostOrder(new StateVisitor() {
+                    public boolean accept(int state) {
+                        int thisNodeNumber = 0;
+                        for (int arc = fsa.getFirstArc(state); arc != 0; arc = fsa.getNextArc(arc)) {
+                            thisNodeNumber +=
+                                (fsa.isArcFinal(arc) ? 1 : 0) +
+                                (fsa.isArcTerminal(arc) ? 0 : numbers.get(fsa.getEndNode(arc)));
+                        }
+                        numbers.put(state, thisNodeNumber);
+                        return true;
+                    }
+                });
+
+                int singleRLC = 0;
+                for (IntIntCursor c : numbers) {
+                    if (c.value == 1) singleRLC++;
+                }
+                
+                logger.log("Nodes", info.nodeCount);
+                logger.log("Arcs", info.arcsCount);
+                logger.log("Tail nodes", singleRLC);
+
+                logger.log("States with the given # of outgoing arcs:");
+                for (Map.Entry<Integer, Integer> e : fanout.entrySet()) {
+                    logger.log("  #" + e.getKey(), e.getValue());
+                }
+
+                logger.log("FSA builder properties:");
+                for (Map.Entry<FSABuilder.InfoEntry, Object> e : builder.getInfo().entrySet()) {
+                    logger.log(e.getKey().toString(), e.getValue());
+                }
+            }
+
+			// Save the result.
+            logger.startPart("Serializing " + format);
+			serializer.serialize(fsa, initializeOutput(line)).close();
+			logger.endPart();
+		} catch (OutOfMemoryError e) {
+		    logger.log("Error: Out of memory. Pass -Xmx1024m argument (or more) to java.");
+		} finally {
+		    if (inputStream != System.in && inputStream != null) {
+		        inputStream.close();
+		    }
+		}
+	}
+
+	/**
+	 * Warn in case UTF-8 BOM is detected as this is 99% a mistake.
+	 */
+	private void checkUtf8Bom(InputStream is) throws IOException {
+	    if (!is.markSupported()) {
+	        // throw a hard assertion.
+	        throw new AssertionError("Mark should be supported on input stream.");
+	    }
+
+	    is.mark(3);
+	    if (is.read() == 0xef &&
+	        is.read() == 0xbb &&
+	        is.read() == 0xbf) {
+	        System.err.println("Warning: input starts with UTF-8 BOM bytes which is" +
+	        		" most likely not what you want. Use header-less UTF-8 file (unless you are" +
+	        		" encoding plain bytes in which case this message doesn't apply).");
+	    }
+	    is.reset();
+    }
+
+    /**
+	 * Process unsorted input (sort and construct FSA).
+	 */
+    private FSA processUnsortedInput(InputStream inputStream)
+            throws IOException {
+        final FSA root;
+        logger.startPart("Reading input");
+        final ArrayList<byte[]> input = readInput(inputStream);
+        logger.endPart();
+
+        logger.log("Input sequences", input.size());
+
+        logger.startPart("Sorting");
+        Collections.sort(input, FSABuilder.LEXICAL_ORDERING);
+        logger.endPart();
+
+        logger.startPart("Building FSA");
+        for (byte [] bb : input)
+            builder.add(bb, 0, bb.length);
+        root = builder.complete();
+        logger.endPart();
+        return root;
+    }
+
+    /**
+     * 
+     */
+    private FSA processSortedInput(InputStream inputStream)
+            throws IOException {
+
+        int lines = forAllLines(inputStream, new LineConsumer() {
+            private byte [] current;
+            private byte [] previous = null;
+            private int previousLen;
+
+            public byte[] process(byte[] current, int currentLen) {
+                // Verify the order.
+                if (previous != null) {
+                    if (FSABuilder.compare(previous, 0, previousLen, current, 0, currentLen) > 0) {
+                        logger.log("\n\nERROR: The input is not sorted: \n" + 
+                                dumpLine(previous, previousLen) + "\n" +
+                                dumpLine(current, currentLen));
+                        throw new TerminateProgramException("Input is not sorted.");
+                    }
+                }
+
+                // Add to the automaton.
+                builder.add(current, 0, currentLen);
+
+                // Swap buffers.
+                this.current = previous != null ? previous : new byte [current.length];
+                this.previous = current;
+                this.previousLen = currentLen;
+
+                return this.current;
+            }
+        });
+
+        logger.startPart("Building FSA");
+        FSA fsa = builder.complete();
+        logger.endPart();
+        logger.log("Input sequences", lines);
+
+        return fsa;
+    }
+
+	/**
+	 * Dump input line, byte-by-byte. 
+	 */
+    protected String dumpLine(byte[] line, int length) {
+        StringBuilder builder = new StringBuilder();
+        for (int i = 0; i < length; i++) {
+            if (i > 0) builder.append(" ");
+            builder.append(String.format("%02x", line[i]));
+        }
+        builder.append(" | ");
+        for (int i = 0; i < length; i++) {
+            if (Character.isLetterOrDigit(line[i]))
+                builder.append((char) line[i]);
+            else
+                builder.append(".");
+        }
+        return builder.toString();
+    }
+
+    /**
+	 * Parse input options.
+	 */
+	private void parseOptions(CommandLine line) {
+	    String opt;
+
+	    opt = SharedOptions.outputFormatOption.getOpt();
+	    if (line.hasOption(opt)) {
+            String formatValue = line.getOptionValue(opt);
+            try {
+                format = Format.valueOf(formatValue.toUpperCase());
+            } catch (IllegalArgumentException e) {
+                throw new TerminateProgramException("Not a valid format: " 
+                        + formatValue);
+            }
+	    } else {
+	        format = Format.FSA5;
+	    }
+        serializer = format.getSerializer();
+
+        Charset defaultCharset = Charset.defaultCharset();
+		opt = SharedOptions.fillerCharacterOption.getLongOpt();
+		if (line.hasOption(opt) && requiredCapability(opt, FSAFlags.SEPARATORS)) {
+			String chr = StringEscapeUtils.unescapeJava(line.getOptionValue(opt));
+			checkSingleByte(chr, defaultCharset);
+			serializer.withFiller(chr.getBytes()[0]);
+		}
+
+		opt = SharedOptions.annotationSeparatorCharacterOption.getLongOpt();
+		if (line.hasOption(opt) && requiredCapability(opt, FSAFlags.SEPARATORS)) {
+			String chr = StringEscapeUtils.unescapeJava(line.getOptionValue(opt));
+			checkSingleByte(chr, defaultCharset);
+			serializer.withAnnotationSeparator(chr.getBytes()[0]);
+		}
+
+        opt = SharedOptions.withNumbersOption.getOpt();
+        if (line.hasOption(opt) && requiredCapability(opt, FSAFlags.NUMBERS)) {
+            serializer.withNumbers();
+        }
+
+        opt = SharedOptions.progressOption.getLongOpt();
+        if (line.hasOption(opt)) {
+            printProgress = true;
+        }
+        
+        opt = SharedOptions.inputSortedOption.getLongOpt();
+        if (line.hasOption(opt)) {
+            inputSorted = true;
+        }
+
+        opt = SharedOptions.statistics.getLongOpt();
+        if (line.hasOption(opt)) {
+            statistics = true;
+        }
+    }
+	
+	private boolean requiredCapability(String opt, FSAFlags flag) {
+	    if (!serializer.getFlags().contains(flag)) {
+	        throw new RuntimeException("This serializer does not support option: " + opt);
+	    }
+	    return true;
+    }
+
+    /**
+	 * Check if the argument is a single byte after conversion using platform-default
+	 * encoding. 
+	 */
+	public static byte checkSingleByte(String chr, Charset charset) {
+	    byte bytes [] = chr.getBytes(charset); 
+		if (bytes.length == 1)
+			return bytes[0];
+
+		throw new IllegalArgumentException("Filler and annotation characters must be single" +
+				"-byte values, " + chr + " has " + chr.getBytes().length + " bytes."); 
+    }
+
+    /**
+	 * Read all the input lines, unsorted.
+	 */
+	private ArrayList<byte[]> readInput(InputStream is) throws IOException {
+	    final ArrayList<byte[]> result = new ArrayList<byte[]>();
+	    forAllLines(is, new LineConsumer() {
+	        public byte[] process(byte[] buffer, int pos) {
+	            result.add(java.util.Arrays.copyOf(buffer, pos));
+	            return buffer;
+	        }
+        });
+	    return result;
+	}
+
+	/**
+	 * Apply line consumer to all non-empty lines.
+	 */
+	private int forAllLines(InputStream is, LineConsumer lineConsumer) throws IOException {
+	    int lines = 0;
+		byte[] buffer = new byte[0];
+		int line = 0, b, pos = 0;
+		while ((b = is.read()) != -1) {
+			if (b == '\r' && !crWarning) {
+				crWarning = true;
+			}
+
+			if (b == '\n') {
+			    if (pos > 0) {
+			        buffer = lineConsumer.process(buffer, pos);
+	                pos = 0;
+	                lines++;
+			    }
+
+				if (printProgress && line++ > 0 && (line % 1000000) == 0) {
+				    logger.log(String.format(Locale.ENGLISH, "%6.2fs, sequences: %d", elapsedTime(), line));
+				}
+			} else {
+				if (pos >= buffer.length) {
+					buffer = java.util.Arrays.copyOf(buffer, buffer.length + 10);
+				}
+				buffer[pos++] = (byte) b;
+			}
+		}
+
+		if (pos > 0) {
+		    lineConsumer.process(buffer, pos);
+		    lines++;
+		}
+
+		return lines;
+	}
+
+	private double elapsedTime() {
+        return (System.currentTimeMillis() - start) / 1000.0d;
+    }
+
+    @Override
+	protected void printUsage() {
+		final HelpFormatter formatter = new HelpFormatter();
+		formatter.printHelp(this.getClass().getName(), options, true);
+	}
+
+	@Override
+	protected void initializeOptions(Options options) {
+		options.addOption(SharedOptions.inputFileOption);
+		options.addOption(SharedOptions.outputFileOption);
+
+		options.addOption(SharedOptions.outputFormatOption);
+		
+		options.addOption(SharedOptions.fillerCharacterOption);
+		options.addOption(SharedOptions.annotationSeparatorCharacterOption);
+
+		options.addOption(SharedOptions.withNumbersOption);
+		options.addOption(SharedOptions.progressOption);
+
+		options.addOption(SharedOptions.inputSortedOption);
+
+		options.addOption(SharedOptions.statistics);
+	}
+
+	/**
+     * 
+     */
+	@SuppressWarnings("resource")
+    private static OutputStream initializeOutput(CommandLine line)
+	        throws IOException, ParseException {
+		final OutputStream output;
+		final String opt = SharedOptions.outputFileOption.getOpt();
+		if (line.hasOption(opt)) {
+			// Use output file.
+			output = new FileOutputStream((File) line.getParsedOptionValue(opt));
+		} else {
+			// Use standard output.
+			output = System.out;
+		}
+		return new BufferedOutputStream(output);
+	}
+
+	/**
+     * 
+     */
+    @SuppressWarnings("resource")
+	private BufferedInputStream initializeInput(CommandLine line)
+	        throws IOException, ParseException {
+		final InputStream input;
+		final String opt = SharedOptions.inputFileOption.getOpt();
+
+		if (line.hasOption(opt)) {
+			// Use input file.
+			File inputFile = (File) line.getParsedOptionValue(opt);
+			if (!inputSorted && inputFile.length() > 20 * MB) {
+			    logger.log("WARN: The input file is quite large, avoid\n" +
+			        "      in-memory sorting by piping pre-sorted\n" +
+			        "      input directly to fsa_build. Linux:\n" +
+			        "      export LC_ALL=C && \\\n" +
+			        "         sort input | \\\n" +
+			        "         java -jar morfologik.jar fsa_build --sorted -o dict.fsa");
+			}
+
+            input = new FileInputStream(inputFile);
+		} else {
+			// Use standard input.
+			input = System.in;
+		}
+		return new BufferedInputStream(input);
+	}
+
+	/**
+	 * Command line entry point.
+	 */
+	public static void main(String... args) throws Exception {
+		final FSABuildTool tool = new FSABuildTool();
+		tool.go(args);
+	}
+}
+\ No newline at end of file
diff --git a/morfologik-tools/src/main/java/morfologik/tools/FSADumpTool.java b/morfologik-tools/src/main/java/morfologik/tools/FSADumpTool.java
new file mode 100644
index 0000000..510d8e5
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/FSADumpTool.java
@@ -0,0 +1,287 @@
+package morfologik.tools;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.util.LinkedHashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import morfologik.fsa.FSA;
+import morfologik.fsa.FSA5;
+import morfologik.fsa.FSAInfo;
+import morfologik.fsa.FSAUtils;
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryAttribute;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.WordData;
+import morfologik.util.FileUtils;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+
+/**
+ * This utility will dump the information and contents of a given {@link FSA}
+ * dictionary. It can dump dictionaries in the raw form (as fed to the
+ * <code>fsa_build</code> program) or decoding compressed stem forms.
+ */
+public final class FSADumpTool extends Tool {
+	/**
+	 * Direct binary stream used for dictionary dumps.
+	 */
+	private OutputStream os;
+
+	/**
+     * A writer for messages and any text-based output.
+     */
+    private Writer w;
+
+	/**
+	 * Print raw data only, no headers.
+	 */
+	private boolean dataOnly;
+
+	/**
+	 * Decode from prefix/infix/suffix encodings. 
+	 */
+	private boolean decode;
+
+	/**
+	 * Dump graphviz DOT file instead of automaton sequences.
+	 */
+	private boolean dot;
+
+	/**
+	 * Command line entry point after parsing arguments.
+	 */
+	protected void go(CommandLine line) throws Exception {
+		final File dictionaryFile = (File) line
+		        .getParsedOptionValue(SharedOptions.fsaDictionaryFileOption
+		                .getOpt());
+
+		dataOnly = line.hasOption(SharedOptions.dataOnly.getOpt());
+		decode = line.hasOption(SharedOptions.decode.getOpt());
+		dot = line.hasOption(SharedOptions.dot.getLongOpt());
+
+		FileUtils.assertExists(dictionaryFile, true, false);
+
+		dump(dictionaryFile);
+	}
+
+	/**
+	 * Dumps the content of a dictionary to a file.
+	 */
+	private void dump(File dictionaryFile)
+	        throws UnsupportedEncodingException, IOException {
+		final long start = System.currentTimeMillis();
+
+		final Dictionary dictionary;
+		final FSA fsa;
+
+		if (!dictionaryFile.canRead()) {
+			printWarning("Dictionary file does not exist: "
+			        + dictionaryFile.getAbsolutePath());
+			return;
+		}
+
+		this.os = new BufferedOutputStream(System.out, 1024 * 32);
+		this.w =  new OutputStreamWriter(os, "UTF-8");
+
+		if (hasMetadata(dictionaryFile)) {
+			dictionary = Dictionary.read(dictionaryFile);
+			fsa = dictionary.fsa;
+
+			final String encoding = dictionary.metadata.getEncoding();
+			if (!Charset.isSupported(encoding)) {
+				printWarning("Dictionary's charset is not supported "
+				        + "on this JVM: " + encoding);
+				return;
+			}
+		} else {
+			dictionary = null;
+			fsa = FSA.read(new FileInputStream(dictionaryFile));
+			printWarning("Warning: FSA automaton without metadata file.");
+		}
+
+		printExtra("FSA properties");
+		printExtra("--------------");
+		printExtra("FSA implementation     : " + fsa.getClass().getName());
+		printExtra("Compiled with flags    : " + fsa.getFlags().toString());
+
+		if (!dataOnly) {
+    		final FSAInfo info = new FSAInfo(fsa);
+    		printExtra("Number of arcs         : " + info.arcsCount + "/" + info.arcsCountTotal);
+    		printExtra("Number of nodes        : " + info.nodeCount);
+    		printExtra("Number of final states : " + info.finalStatesCount);
+    		printExtra("");
+		}
+
+		// Separator for dumping.
+		char separator = '\t';
+
+		if (fsa instanceof FSA5) {
+			printExtra("FSA5 properties");
+			printExtra("---------------");
+			printFSA5((FSA5) fsa);			
+			printExtra("");
+		}
+
+		if (dictionary != null) {
+			printExtra("Dictionary metadata");
+            printExtra("-------------------");
+            
+            Map<DictionaryAttribute,String> values =
+                new LinkedHashMap<DictionaryAttribute,String>(dictionary.metadata.getAttributes());
+            values.put(DictionaryAttribute.ENCODING, dictionary.metadata.getEncoding());
+            values.put(DictionaryAttribute.SEPARATOR, "0x"
+                + Integer.toHexString(dictionary.metadata.getSeparator())
+                + " ('" + dictionary.metadata.getSeparatorAsChar() + "')");
+
+            for (Map.Entry<DictionaryAttribute,String> e : values.entrySet()) {
+                printExtra(String.format(Locale.ENGLISH,
+                    "%-40s: %s",
+                    e.getKey().propertyName,
+                    e.getValue()));
+            }
+            printExtra("");
+		}
+
+		int sequences = 0;
+		if (decode) {
+			if (dictionary == null) {
+				printWarning("No dictionary metadata available.");
+				return;
+			}
+
+			printExtra("Decoded FSA data (in the encoding above)");
+			printExtra("----------------------------------------");
+
+			final DictionaryLookup dl = new DictionaryLookup(dictionary);
+			final StringBuilder builder = new StringBuilder();
+			final OutputStreamWriter osw = new OutputStreamWriter(os, dictionary.metadata.getEncoding());
+
+			CharSequence t;
+			for (WordData wd : dl) {
+				builder.setLength(0);
+				builder.append(wd.getWord());
+				builder.append(separator);
+
+				t = wd.getStem();
+				if (t == null)
+					t = "";
+				builder.append(t);
+				builder.append(separator);
+
+				t = wd.getTag();
+				if (t == null)
+					t = "";
+				builder.append(t);
+				builder.append('\n');
+
+				osw.write(builder.toString());
+				sequences++;
+			}
+			osw.flush();
+		} else {
+			if (dot) {
+				FSAUtils.toDot(w, fsa, fsa.getRootNode());
+				w.flush();
+			} else {
+    			printExtra("FSA data (raw bytes in the encoding above)");
+    			printExtra("------------------------------------------");
+    
+    			for (ByteBuffer bb : fsa) {
+    				os.write(bb.array(), 0, bb.remaining());
+    				os.write(0x0a);
+    				sequences++;
+    			}
+			}
+		}
+
+		printExtra("--------------------");
+
+		final long millis = Math.max(1, System.currentTimeMillis() - start);
+		printExtra(String
+		        .format(
+		                Locale.ENGLISH,
+		                "Dictionary dumped in %.3f second(s), %d sequences (%d sequences/sec.).",
+		                millis / 1000.0, sequences,
+		                (int) (sequences / (millis / 1000.0))));
+
+		os.flush();
+	}
+
+	/**
+	 * Print {@link FSA5}-specific stuff.
+	 */
+	private void printFSA5(FSA5 fsa) throws IOException {
+		printExtra("GTL                    : " + fsa.gtl);
+		printExtra("Node extra data        : " + fsa.nodeDataLength);
+		printExtra("Annotation separator   : " + byteAsChar(fsa.annotation));
+		printExtra("Filler character       : " + byteAsChar(fsa.filler));
+    }
+
+	/**
+	 * Convert a byte to a character, no charset decoding, simple ASCII range mapping.
+	 */
+	private char byteAsChar(byte v) {
+		char chr = (char) (v & 0xff);
+		if (chr < 127)
+			return chr;
+		else
+			return '?';
+    }
+
+	/*
+     * 
+     */
+	private void printExtra(String msg) throws IOException {
+		if (dataOnly)
+			return;
+		w.write(msg);
+		w.write('\n');
+		w.flush();
+	}
+
+	/*
+     * 
+     */
+	private void printWarning(String msg) {
+		System.err.println(msg);
+	}
+
+	/**
+	 * Check if there is a metadata file for the given FSA automaton.
+	 */
+	private static boolean hasMetadata(File fsaFile) {
+		final File featuresFile = new File(fsaFile.getParent(), Dictionary
+		        .getExpectedFeaturesName(fsaFile.getName()));
+
+		return featuresFile.canRead();
+	}
+
+	/**
+	 * Command line options for the tool.
+	 */
+	protected void initializeOptions(Options options) {
+		options.addOption(SharedOptions.fsaDictionaryFileOption);
+		options.addOption(SharedOptions.dataOnly);
+		options.addOption(SharedOptions.decode);
+		options.addOption(SharedOptions.dot);
+	}
+
+	/**
+	 * Command line entry point.
+	 */
+	public static void main(String... args) throws Exception {
+		final FSADumpTool fsaDump = new FSADumpTool();
+		fsaDump.go(args);
+	}
+}
+\ No newline at end of file
diff --git a/morfologik-tools/src/main/java/morfologik/tools/InflectionFramesTool.java b/morfologik-tools/src/main/java/morfologik/tools/InflectionFramesTool.java
new file mode 100644
index 0000000..e913b7f
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/InflectionFramesTool.java
@@ -0,0 +1,112 @@
+package morfologik.tools;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.*;
+import java.util.*;
+import java.util.Map.Entry;
+
+import morfologik.stemming.*;
+import morfologik.stemming.Dictionary;
+
+/**
+ * Calculate inflection frames from the Polish dictionary.
+ */
+public class InflectionFramesTool {
+	public static void main(String[] args) throws IOException {
+		new InflectionFramesTool().inflectionFrames();
+	}
+
+	/* */
+	@SuppressWarnings( { "unused" })
+	public void inflectionFrames() throws IOException {
+		final Dictionary pl = Dictionary.getForLanguage("pl");
+		final DictionaryLookup dict = new DictionaryLookup(pl);
+		final CharsetDecoder decoder = pl.metadata.getDecoder();
+
+		final HashMap<String, ArrayList<String>> forms = 
+			new HashMap<String, ArrayList<String>>();
+
+		ByteBuffer stemBuffer = ByteBuffer.allocate(0);
+		ByteBuffer inflBuffer = ByteBuffer.allocate(0);
+		ByteBuffer stemDecoded = ByteBuffer.allocate(0);
+
+		int limit = Integer.MAX_VALUE;
+
+		final Iterator<WordData> i = new DictionaryIterator(pl, decoder, false);
+		while (i.hasNext() && limit-- > 0) {
+			final WordData wd = i.next();
+
+			final CharSequence inflected = wd.getWord();
+			final CharSequence stemEncoded = wd.getStem();
+			final CharSequence tag = wd.getTag();
+			if (tag == null)
+				continue;
+
+			inflBuffer.clear();
+			inflBuffer = wd.getWordBytes(inflBuffer);
+
+			stemBuffer.clear();
+			stemBuffer = wd.getStemBytes(stemBuffer);
+
+			stemDecoded = DictionaryLookup.decodeBaseForm(stemDecoded, stemBuffer
+			        .array(), stemBuffer.remaining(), inflBuffer, pl.metadata);
+			stemDecoded.flip();
+
+			final String stem = decoder.decode(stemDecoded).toString();
+			final String form = tag.toString().intern();
+
+			ArrayList<String> frames = forms.get(stem);
+			if (frames == null) {
+				forms.put(stem, frames = new ArrayList<String>());
+			}
+
+			if (!frames.contains(form)) {
+				frames.add(form);
+			}
+		}
+
+		// Sort the forms so that we get a unique key. Then iteratively add them
+		// to another hash (by form this time).
+		final HashMap<String, ArrayList<String>> frames = 
+			new HashMap<String, ArrayList<String>>();
+
+		StringBuilder key = new StringBuilder();
+		for (Map.Entry<String, ArrayList<String>> e : forms.entrySet()) {
+			Collections.sort(e.getValue());
+
+			key.setLength(0);
+			for (String s : e.getValue())
+				key.append(s).append(" ");
+
+			final String k = key.toString();
+			ArrayList<String> words = frames.get(k);
+			if (words == null) {
+				frames.put(k, words = new ArrayList<String>());
+			}
+			words.add(e.getKey());
+
+			e.setValue(null);
+		}
+
+		// Print inflection frames.
+		ArrayList<Map.Entry<String, ArrayList<String>>> entries = 
+			new ArrayList<Map.Entry<String, ArrayList<String>>>();
+
+		entries.addAll(frames.entrySet());
+		Collections.sort(entries,
+		        new Comparator<Map.Entry<String, ArrayList<String>>>() {
+			        public int compare(Entry<String, ArrayList<String>> o1,
+			                Entry<String, ArrayList<String>> o2) {
+				        return o2.getValue().size() - o1.getValue().size();
+			        }
+		        });
+
+		for (Map.Entry<String, ArrayList<String>> e : entries) {
+			System.out.println(String.format("%6d   %s %s",
+			        e.getValue().size(), e.getKey(), e.getValue()));
+		}
+
+		System.out.println("Total frames: " + frames.size());
+	}
+}
diff --git a/morfologik-tools/src/main/java/morfologik/tools/Launcher.java b/morfologik-tools/src/main/java/morfologik/tools/Launcher.java
new file mode 100644
index 0000000..320c1dc
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/Launcher.java
@@ -0,0 +1,158 @@
+package morfologik.tools;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Method;
+import java.net.URL;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.TreeMap;
+import java.util.jar.Manifest;
+
+import morfologik.util.FileUtils;
+
+/**
+ * A launcher for other command-line tools.
+ */
+public final class Launcher {
+	/**
+	 * Tool description.
+	 */
+	final static class ToolInfo {
+		public final Class<? extends Tool> clazz;
+		public final String info;
+
+		public ToolInfo(Class<? extends Tool> clazz, String info) {
+			this.clazz = clazz;
+			this.info = info;
+		}
+
+		public void invoke(String[] subArgs) throws Exception {
+			final Method m = clazz.getMethod("main",
+			        new Class[] { String[].class });
+			m.invoke(null, new Object[] { subArgs });
+		}
+	}
+
+	/**
+	 * Command line entry point.
+	 */
+	public static void main(String[] args) throws Exception {
+		// If so, tools are unavailable and a classpath error has been logged.
+		final TreeMap<String, ToolInfo> tools = initTools();
+
+		if (tools == null)
+		{
+			return;
+		}
+
+		if (args.length == 0) {
+			System.out.println("Provide tool name and its command-line options. "
+			    + "Available tools:");
+			for (String key : tools.keySet()) {
+				final ToolInfo toolInfo = tools.get(key);
+				System.out.println(String.format("  %-10s - %s", key,
+				        toolInfo.info));
+			}
+		} else {
+			final String toolName = args[0];
+			if (!tools.containsKey(toolName)) {
+				System.out.println("Unknown tool: " + toolName);
+				return;
+			}
+
+			final String[] subArgs = new String[args.length - 1];
+			System.arraycopy(args, 1, subArgs, 0, subArgs.length);
+
+			final ToolInfo toolInfo = (ToolInfo) tools.get(toolName);
+			toolInfo.invoke(subArgs);
+		}
+	}
+
+	/**
+	 * Initialize and check tools' availability.
+	 */
+	static TreeMap<String, ToolInfo> initTools() {
+		TreeMap<String, ToolInfo> tools = new TreeMap<String, ToolInfo>();
+
+		tools.put("fsa_build", new ToolInfo(FSABuildTool.class,
+		        "Create an automaton from plain text files."));
+
+		tools.put("fsa_dump", new ToolInfo(FSADumpTool.class,
+		        "Dump an FSA dictionary."));
+
+		tools.put("tab2morph", new ToolInfo(MorphEncodingTool.class,
+		        "Convert tabbed dictionary to fsa encoding format."));
+
+		tools.put("plstem", new ToolInfo(PolishStemmingTool.class,
+		        "Apply Polish dictionary stemming to the input."));
+
+		// Prune unavailable tools.
+		for (Iterator<ToolInfo> i = tools.values().iterator(); i.hasNext();) {
+			ToolInfo ti = i.next();
+			try {
+				ti.clazz.newInstance().isAvailable();
+			} catch (NoClassDefFoundError e) {
+				logJarWarning();
+				return null;
+			} catch (Throwable e) {
+				System.out.println("Tools could not be initialized because" +
+						" of an exception during initialization: "
+						+ e.getClass().getName() + ", " + e.getMessage());
+				return null;
+			}
+		}
+		
+		return tools;
+	}
+	
+	/**
+	 * Log a warning about missing JAR dependencies.
+	 */
+	private static void logJarWarning() {
+		System.out.println("Tools are unavailable, at least one JAR dependency missing.");
+
+		try {
+			final Class<Launcher> clazz = Launcher.class;
+			final ClassLoader classLoader = clazz.getClassLoader();
+
+			final String clazzName = clazz.getName().replace('.', '/') + ".class";
+			// Figure out our own class path location.
+			final URL launcherLocation = classLoader.getResource(clazzName);
+			if (launcherLocation == null)
+				return;
+			
+			String launcherPrefix = launcherLocation.toString()
+				.replace(clazzName, "");
+
+			// Figure our our location's MANIFEST.MF (class loader may be hitting a few).
+			URL manifestResource = null;
+    		Enumeration<URL> manifests = classLoader.getResources("META-INF/MANIFEST.MF");
+    		while (manifests.hasMoreElements())
+    		{
+    			URL candidate = manifests.nextElement();
+    			if (candidate.toString().startsWith(launcherPrefix))
+    			{
+    				manifestResource = candidate;
+    				break;
+    			}
+    		}
+    		
+    		if (manifestResource == null)
+    			return;
+
+			InputStream stream = null;
+			try {
+				stream = manifestResource.openStream();
+				Manifest manifest = new Manifest(stream);
+				
+				System.out.println("Required JARs: "
+						+ manifest.getMainAttributes().getValue("Class-Path"));
+			} catch (IOException e) {
+				FileUtils.close(stream);
+			}
+		} catch (IOException e) {
+			// Ignore.
+		}
+    }
+}
diff --git a/morfologik-tools/src/main/java/morfologik/tools/MorphEncodingTool.java b/morfologik-tools/src/main/java/morfologik/tools/MorphEncodingTool.java
new file mode 100644
index 0000000..dfade2d
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/MorphEncodingTool.java
@@ -0,0 +1,255 @@
+package morfologik.tools;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Locale;
+
+import morfologik.fsa.FSA5;
+import morfologik.stemming.EncoderType;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang.StringEscapeUtils;
+
+
+/**
+ * This utility converts the dictionary in a text (tabbed) format into 
+ * the format accepted by the fsa building tools. It is meant to replace
+ * the Perl and AWK scripts from the original FSA package. 
+ */
+class MorphEncodingTool extends Tool {
+    private static Charset US_ASCII = Charset.forName("US-ASCII"); 
+	private boolean noWarn = false;
+	private SequenceAssembler encoder;
+    private byte separatorByte;
+    private char separator; 
+
+	/**
+     * 
+     */
+	protected void go(final CommandLine line) throws Exception {
+		noWarn = line.hasOption(SharedOptions.noWarnIfTwoFields.getOpt());
+	
+		EncoderType encType = EncoderType.SUFFIX;
+		if (line.hasOption(SharedOptions.encoder.getOpt())) {
+		    String encValue = line.getOptionValue(SharedOptions.encoder.getOpt());
+		    try {
+		        encType = EncoderType.valueOf(encValue.toUpperCase());
+		    } catch (IllegalArgumentException e) {
+		        throw new IllegalArgumentException("Invalid encoder: " + encValue + ", "
+		            + "allowed values: " + Arrays.toString(EncoderType.values()));
+		    }
+		}
+
+		separator = FSA5.DEFAULT_ANNOTATION;
+		if (line.hasOption(SharedOptions.annotationSeparatorCharacterOption.getLongOpt())) {
+			String sep = line.getOptionValue(SharedOptions.annotationSeparatorCharacterOption.getLongOpt());
+
+			// Decode escape sequences.
+			sep = StringEscapeUtils.unescapeJava(sep);
+			if (sep.length() != 1) {
+			    throw new IllegalArgumentException("Field separator must be a single character: " + sep);
+			}
+			if (sep.charAt(0) > 0xff) {
+			    throw new IllegalArgumentException("Field separator not within byte range: " + (int) sep.charAt(0));
+			}
+            separator = sep.charAt(0);
+			separatorByte = FSABuildTool.checkSingleByte(Character.toString(separator), Charset.defaultCharset());
+		}
+		
+        encoder = new SequenceAssembler(SequenceEncoders.forType(encType), (byte) separator);
+
+		// Determine input and output streams.
+		final DataInputStream input = initializeInput(line);
+		final DataOutputStream output = initializeOutput(line);
+		
+		try {
+			process(input, output);
+			output.flush();
+			
+		} finally {
+			input.close();
+			output.close();
+		}
+	}
+	
+	/**
+	 * Process input stream, writing to output stream.
+	 *  
+	 */
+	protected void process(final DataInputStream input, final DataOutputStream output)
+	        throws IOException {
+		long lnumber = 0;
+		try {
+			int bufPos = 0;
+			byte[] buf = new byte[0];
+			ArrayList<byte[]> columns = new ArrayList<byte[]>();
+			int dataByte;
+			do  {
+			    dataByte = input.read();
+			    switch (dataByte) {
+			        case '\r':
+			            // Ignore CR
+			            continue;
+
+			        case '\t':
+			            columns.add(Arrays.copyOf(buf, bufPos));
+			            bufPos = 0;
+			            break;
+
+			        case -1:
+			            // Process EOF as if we encountered \n. fall-through.
+
+			        case '\n':
+                        lnumber++;
+                        if (bufPos == 0 && columns.isEmpty()) {
+                            if (dataByte != -1) {
+                                System.err.println(String.format(Locale.ROOT, 
+                                    "Ignoring empty line %d.", lnumber));
+                            }
+                            break;
+	                    }
+
+			            columns.add(Arrays.copyOf(buf, bufPos));
+
+                        if (columns.size() < 2 || columns.size() > 3) {
+                            throw new IllegalArgumentException(
+                                String.format(Locale.ROOT, "Every \\n-delimited 'line' must contain 2 or 3 columns, line %d has %d. US-ASCII version of this line: %s",
+                                    lnumber,
+                                    columns.size(),
+                                    toAscii(columns)));
+                        }
+
+                        if (columns.size() == 2 && !noWarn) {
+                            System.err.println(String.format(Locale.ROOT, 
+                                "Line %d has %d columns. US-ASCII version of this line: %s",
+                                lnumber,
+                                columns.size(),
+                                toAscii(columns)));
+                        }
+
+                        byte [] wordForm = columns.get(0);
+                        byte [] wordLemma = columns.get(1);
+                        if (contains(wordForm, separatorByte) ||
+                            contains(wordLemma, separatorByte)) {
+                            throw new IllegalArgumentException(
+                                String.format(Locale.ROOT,
+                                    "Either word or lemma in line %d contain the annotation byte '%s': %s",
+                                    lnumber,
+                                    separator,
+                                    toAscii(columns)));
+                        }
+
+                        output.write(encoder.encode(
+                            wordForm, 
+                            wordLemma, 
+                            columns.size() > 2 ? columns.get(2) : null));
+
+                        output.writeByte('\n');
+
+                        bufPos = 0;
+                        columns.clear();
+			            break;
+
+		            default:
+	                    if (bufPos >= buf.length) {
+	                        buf = Arrays.copyOf(buf, buf.length + 1024);
+	                    }
+	                    buf[bufPos++] = (byte) dataByte;
+			    }
+			} while (dataByte != -1);		
+		} finally {
+			input.close();
+		}
+	}
+
+	private boolean contains(byte [] seq, byte b) {
+        for (int i = 0; i < seq.length; i++) {
+            if (seq[i] == b) return true;
+        }
+        return false;
+    }
+
+    private String toAscii(ArrayList<byte []> columns)
+    {
+	    StringBuilder b = new StringBuilder();
+	    for (int i = 0; i < columns.size(); i++) {
+            if (i > 0) b.append("\t");
+	        b.append(new String(columns.get(i), US_ASCII));
+	    }
+        return b.toString();
+    }
+
+    /**
+	 * Command line options for the tool.
+	 */
+	protected void initializeOptions(Options options) {
+		options.addOption(SharedOptions.inputFileOption);		
+		options.addOption(SharedOptions.outputFileOption);
+		options.addOption(SharedOptions.encoder);
+		options.addOption(SharedOptions.noWarnIfTwoFields);
+		options.addOption(SharedOptions.annotationSeparatorCharacterOption);
+	}
+
+	/**
+     * 
+     */
+	private static DataOutputStream initializeOutput(CommandLine line)
+	        throws IOException, ParseException {
+		final DataOutputStream output;		
+		final String opt = SharedOptions.outputFileOption.getOpt();
+		if (line.hasOption(opt)) {
+			// Use output file.
+			output = new DataOutputStream(
+					new BufferedOutputStream(
+					new FileOutputStream((File) line
+			                .getParsedOptionValue(opt))));
+		} else {
+			// Use standard output.
+			output = new DataOutputStream(
+					new BufferedOutputStream(
+					System.out));
+		}
+		return output;
+	}
+
+	/**
+     * 
+     */
+	private static DataInputStream initializeInput(CommandLine line)
+	        throws IOException, ParseException {
+		final DataInputStream input;
+		final String opt = SharedOptions.inputFileOption.getOpt();
+		if (line.hasOption(opt)) {
+			// Use input file.
+			input = new DataInputStream ( 
+					new BufferedInputStream(
+					new FileInputStream((File) line
+			                .getParsedOptionValue(opt))));
+		} else {
+			// Use standard input.
+			input = new DataInputStream(
+					new BufferedInputStream(
+					System.in));
+		}
+		return input;
+	}
+
+	/**
+	 * Command line entry point.
+	 */
+	public static void main(String... args) throws Exception {
+		final MorphEncodingTool tool = new MorphEncodingTool();
+		tool.go(args);
+	}
+}
+\ No newline at end of file
diff --git a/morfologik-tools/src/main/java/morfologik/tools/PolishStemmingTool.java b/morfologik-tools/src/main/java/morfologik/tools/PolishStemmingTool.java
new file mode 100644
index 0000000..22c84c3
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/PolishStemmingTool.java
@@ -0,0 +1,193 @@
+package morfologik.tools;
+
+import java.io.*;
+import java.text.MessageFormat;
+import java.util.List;
+import java.util.Locale;
+
+import morfologik.stemming.*;
+
+import org.apache.commons.cli.*;
+
+/**
+ * This utility parses input text, tokenizes it on whitespace and stems input
+ * words, writing them to the output in column-based format:
+ * 
+ * <pre>
+ * word   stem   form
+ * word   stem   form
+ * </pre>
+ * 
+ * Words for which no stems or forms are available have empty values in each
+ * respective column. Columns are tab-delimited.
+ */
+class PolishStemmingTool extends Tool {
+	/**
+     * 
+     */
+	protected void go(CommandLine line) throws Exception {
+		// Determine input/ output encoding.
+		final String inputEncoding = getEncodingOption(line,
+		        SharedOptions.inputEncodingOption.getOpt());
+
+		final String outputEncoding = getEncodingOption(line,
+		        SharedOptions.outputEncodingOption.getOpt());
+
+		System.err.println("Input encoding: " + inputEncoding);
+		System.err.println("Output encoding: " + outputEncoding);
+
+		// Determine input and output streams.
+		final Reader input = initializeInput(line, inputEncoding);
+		final Writer output = initializeOutput(line, outputEncoding);
+
+		final long start = System.currentTimeMillis();
+		try {
+			final long count = process(input, output);
+
+			output.flush();
+
+			final long millis = System.currentTimeMillis() - start;
+			final double time = millis / 1000.0;
+			final double wordsPerSec = time > 0 ? (count / time)
+			        : Double.POSITIVE_INFINITY;
+			System.out
+			        .println(new MessageFormat(
+			                "Processed {0} words in {1,number,#.###} seconds ({2,number,#} words per second).",
+			                Locale.ENGLISH).format(new Object[] {
+			                new Long(count), new Double(millis / 1000.0),
+			                new Double(wordsPerSec) }));
+		} finally {
+			input.close();
+			output.close();
+		}
+
+	}
+
+	/**
+	 * Process input stream, writing to output stream.
+	 * 
+	 * @return Returns the number of processed words.
+	 */
+	protected long process(Reader input, Writer output) throws IOException {
+		final IStemmer stemmer = new PolishStemmer();
+		final StreamTokenizer st = new StreamTokenizer(input);
+		st.eolIsSignificant(false);
+		st.wordChars('+', '+');
+
+		long count = 0;
+		int token;
+		while ((token = st.nextToken()) != StreamTokenizer.TT_EOF) {
+			if (token == StreamTokenizer.TT_WORD) {
+				final String word = st.sval;
+
+				count++;
+				final List<WordData> stems = stemmer.lookup(word);
+				if (stems.size() == 0) {
+					output.write(word);
+					output.write("\t-\t-\n");
+				} else {
+					for (WordData wd : stems) {
+						output.write(word);
+						output.write("\t");
+						output.write(asString(wd.getStem()));
+						output.write("\t");
+						output.write(asString(wd.getTag()));
+						output.write("\n");
+					}
+				}
+			}
+		}
+
+		return count;
+	}
+
+	private String asString(CharSequence stem) {
+		if (stem == null)
+			return "-";
+		return stem.toString();
+	}
+
+	/**
+	 * Command line options for the tool.
+	 */
+	protected void initializeOptions(Options options) {
+		options.addOption(SharedOptions.inputFileOption);
+		options.addOption(SharedOptions.inputEncodingOption);
+		options.addOption(SharedOptions.outputFileOption);
+		options.addOption(SharedOptions.outputEncodingOption);
+	}
+
+	/**
+     * 
+     */
+	private Writer initializeOutput(CommandLine line, String outputEncoding)
+	        throws IOException, ParseException {
+		final Writer output;
+		final String opt = SharedOptions.outputFileOption.getOpt();
+		if (line.hasOption(opt)) {
+			// Use output file.
+			output = new OutputStreamWriter(
+			        new BufferedOutputStream(new FileOutputStream((File) line
+			                .getParsedOptionValue(opt))), outputEncoding);
+		} else {
+			// Use standard output.
+            System.err.println("Using standard output for output.");
+			output = new OutputStreamWriter(System.out, outputEncoding);
+		}
+		return output;
+	}
+
+	/**
+     * 
+     */
+	private Reader initializeInput(CommandLine line, String inputEncoding)
+	        throws IOException, ParseException {
+		final Reader input;
+		final String opt = SharedOptions.inputFileOption.getOpt();
+
+		if (line.hasOption(opt)) {
+			// Use input file.
+			input = new InputStreamReader(
+			        new BufferedInputStream(new FileInputStream((File) line
+			                .getParsedOptionValue(opt))), inputEncoding);
+		} else {
+			// Use standard input.
+		    System.err.println("Using standard input for input.");
+			input = new InputStreamReader(System.in, inputEncoding);
+		}
+		return input;
+	}
+
+	/**
+     *  
+     */
+	private String getEncodingOption(CommandLine line, String opt) {
+		String encoding = System.getProperty("file.encoding", "iso-8859-1");
+		if (line.hasOption(opt)) {
+			encoding = line.getOptionValue(opt);
+		}
+		return encoding;
+	}
+
+	/*
+	 * Check if the dictionary is available.
+	 */
+	@Override
+	protected boolean isAvailable() {
+		boolean available = true;
+		try {
+			new PolishStemmer();
+		} catch (Throwable t) {
+			available = false;
+		}
+		return available;
+	}
+
+	/**
+	 * Command line entry point.
+	 */
+	public static void main(String[] args) throws Exception {
+		final PolishStemmingTool tool = new PolishStemmingTool();
+		tool.go(args);
+	}
+}
+\ No newline at end of file
diff --git a/morfologik-tools/src/main/java/morfologik/tools/SequenceAssembler.java b/morfologik-tools/src/main/java/morfologik/tools/SequenceAssembler.java
new file mode 100644
index 0000000..e5fd388
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/SequenceAssembler.java
@@ -0,0 +1,46 @@
+package morfologik.tools;
+
+import morfologik.fsa.FSA5;
+import morfologik.tools.SequenceEncoders.IEncoder;
+
+import com.carrotsearch.hppc.ByteArrayList;
+
+final class SequenceAssembler {
+	private final byte annotationSeparator;
+
+	private final ByteArrayList src = new ByteArrayList();
+	private final ByteArrayList dst = new ByteArrayList();
+	private final ByteArrayList tmp = new ByteArrayList();
+
+    private final IEncoder encoder;
+
+	public SequenceAssembler(SequenceEncoders.IEncoder encoder) {
+		this(encoder, FSA5.DEFAULT_ANNOTATION);
+	}
+
+	public SequenceAssembler(SequenceEncoders.IEncoder encoder, byte annotationSeparator) {
+		this.annotationSeparator = annotationSeparator;
+		this.encoder = encoder;
+	}
+
+    byte [] encode(byte [] wordForm, byte [] wordLemma, byte [] wordTag)
+    {
+        src.clear(); 
+        dst.clear(); 
+        tmp.clear();
+    
+        tmp.add(wordForm);
+        tmp.add(annotationSeparator);
+        
+        src.add(wordForm);
+        dst.add(wordLemma);
+        encoder.encode(src, dst, tmp);
+
+        tmp.add(annotationSeparator);
+        if (wordTag != null) {
+    	    tmp.add(wordTag);
+        }
+    
+        return tmp.toArray();
+    }
+}
diff --git a/morfologik-tools/src/main/java/morfologik/tools/SequenceEncoders.java b/morfologik-tools/src/main/java/morfologik/tools/SequenceEncoders.java
new file mode 100644
index 0000000..37cd0cc
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/SequenceEncoders.java
@@ -0,0 +1,361 @@
+package morfologik.tools;
+
+import morfologik.stemming.EncoderType;
+
+import com.carrotsearch.hppc.ByteArrayList;
+
+/**
+ * Container class for sequence encoders.
+ */
+public final class SequenceEncoders {
+    private SequenceEncoders() {}
+
+    /**
+     * Maximum encodable single-byte code.
+     */
+    private static final int REMOVE_EVERYTHING = 255;
+
+    public static interface IEncoder {
+        public ByteArrayList encode(ByteArrayList src, ByteArrayList derived, ByteArrayList encodedBuffer);
+        public ByteArrayList decode(ByteArrayList src, ByteArrayList encoded, ByteArrayList derivedBuffer);
+        public EncoderType type();
+    }
+
+    /**
+     * Encodes <code>dst</code> relative to <code>src</code> by trimming 
+     * whatever non-equal suffix <code>src</code> has. The output code is (bytes):
+     * <pre>
+     * {K}{suffix}
+     * </pre>
+     * where (<code>K</code> - 'A') bytes should be trimmed from the end of <code>src</code> 
+     * and then the <code>suffix</code> should be appended to the resulting byte sequence.
+     * 
+     * <p>Examples:</p>
+     * <pre>
+     * src: foo
+     * dst: foobar
+     * encoded: Abar
+     * 
+     * src: foo
+     * dst: bar
+     * encoded: Dbar
+     * </pre>
+     * 
+     * <p><strong>Note:</strong> The code length is a single byte. If equal to 
+     * {@link SequenceEncoders#REMOVE_EVERYTHING} the entire <code>src</code> sequence
+     * should be discarded.</p>
+     */
+    public static class TrimSuffixEncoder implements IEncoder {
+        public ByteArrayList encode(ByteArrayList src, ByteArrayList dst, ByteArrayList encoded) {
+            int sharedPrefix = sharedPrefixLength(src, dst);
+            int truncateBytes = src.size() - sharedPrefix;
+            if (truncateBytes >= REMOVE_EVERYTHING) {
+                truncateBytes = REMOVE_EVERYTHING;
+                sharedPrefix = 0;
+            }
+
+            final byte suffixTrimCode = (byte) (truncateBytes + 'A');
+            encoded.add(suffixTrimCode);
+            encoded.add(dst.buffer, sharedPrefix, dst.size() - sharedPrefix);
+
+            return encoded;
+        }
+
+        public ByteArrayList decode(ByteArrayList src, ByteArrayList encoded, ByteArrayList dst) {
+            int suffixTrimCode = encoded.get(0);
+            int truncateBytes = (suffixTrimCode - 'A') & 0xFF;
+            if (truncateBytes == REMOVE_EVERYTHING) {
+                truncateBytes = src.size();
+            }
+
+            dst.add(src.buffer, 0, src.size() - truncateBytes);
+            dst.add(encoded.buffer, 1, encoded.size() - 1);
+
+            return dst;
+        }
+        
+        @Override
+        public EncoderType type() {
+            return EncoderType.SUFFIX;
+        }
+        
+        @Override
+        public String toString() {
+            return getClass().getSimpleName();
+        }
+    }
+
+    /**
+     * Encodes <code>dst</code> relative to <code>src</code> by trimming 
+     * whatever non-equal suffix and prefix <code>src</code> and <code>dst</code> have. 
+     * The output code is (bytes):
+     * <pre>
+     * {P}{K}{suffix}
+     * </pre>
+     * where (<code>P</code> - 'A') bytes should be trimmed from the start of <code>src</code>,
+     *  (<code>K</code> - 'A') bytes should be trimmed from the end of <code>src</code>
+     * and then the <code>suffix</code> should be appended to the resulting byte sequence.
+     * 
+     * <p>Examples:</p>
+     * <pre>
+     * src: abc
+     * dst: abcd
+     * encoded: AAd
+     * 
+     * src: abc
+     * dst: xyz
+     * encoded: ADxyz
+     * </pre>
+     * 
+     * <p><strong>Note:</strong> Each code's length is a single byte. If any is equal to 
+     * {@link SequenceEncoders#REMOVE_EVERYTHING} the entire <code>src</code> sequence
+     * should be discarded.</p>
+     */
+    public static class TrimPrefixAndSuffixEncoder implements IEncoder {
+        public ByteArrayList encode(ByteArrayList src, ByteArrayList dst, ByteArrayList encoded) {
+            // Search for the maximum matching subsequence that can be encoded. 
+            int maxSubsequenceLength = 0;
+            int maxSubsequenceIndex = 0;
+            for (int i = 0; i < src.size(); i++) {
+                // prefix at i => shared subsequence (infix)
+                int sharedPrefix = sharedPrefixLength(src, i, dst, 0);
+                // Only update maxSubsequenceLength if we will be able to encode it.
+                if (sharedPrefix > maxSubsequenceLength
+                        && i < REMOVE_EVERYTHING
+                        && (src.size() - (i + sharedPrefix)) < REMOVE_EVERYTHING) {
+                    maxSubsequenceLength = sharedPrefix;
+                    maxSubsequenceIndex = i;
+                }
+            }
+
+            // Determine how much to remove (and where) from src to get a prefix of dst.
+            int truncatePrefixBytes = maxSubsequenceIndex;
+            int truncateSuffixBytes = (src.size() - (maxSubsequenceIndex + maxSubsequenceLength));
+            if (truncatePrefixBytes >= REMOVE_EVERYTHING ||
+                truncateSuffixBytes >= REMOVE_EVERYTHING) {
+                maxSubsequenceIndex = maxSubsequenceLength = 0;
+                truncatePrefixBytes = truncateSuffixBytes = REMOVE_EVERYTHING;
+            }
+
+            encoded.add((byte) ((truncatePrefixBytes + 'A') & 0xFF));
+            encoded.add((byte) ((truncateSuffixBytes + 'A') & 0xFF));
+            encoded.add(dst.buffer, maxSubsequenceLength, dst.size() - maxSubsequenceLength);
+
+            return encoded;
+        }
+
+        public ByteArrayList decode(ByteArrayList src, ByteArrayList encoded, ByteArrayList dst) {
+            int truncatePrefixBytes = (encoded.get(0) - 'A') & 0xFF;
+            int truncateSuffixBytes = (encoded.get(1) - 'A') & 0xFF;
+
+            if (truncatePrefixBytes == REMOVE_EVERYTHING ||
+                truncateSuffixBytes == REMOVE_EVERYTHING) {
+                truncatePrefixBytes = src.size();
+                truncateSuffixBytes = 0;
+            }
+
+            dst.add(src.buffer, truncatePrefixBytes, src.size() - (truncateSuffixBytes + truncatePrefixBytes));
+            dst.add(encoded.buffer, 2, encoded.size() - 2);
+
+            return dst;
+        }
+        
+        @Override
+        public EncoderType type() {
+            return EncoderType.PREFIX;
+        }
+        
+        @Override
+        public String toString() {
+            return getClass().getSimpleName();
+        }        
+    }
+
+    /**
+     * Encodes <code>dst</code> relative to <code>src</code> by trimming 
+     * whatever non-equal suffix and infix <code>src</code> and <code>dst</code> have. 
+     * The output code is (bytes):
+     * <pre>
+     * {X}{L}{K}{suffix}
+     * </pre>
+     * where <code>src's</code> infix at position (<code>X</code> - 'A') and of length
+     * (<code>L</code> - 'A') should be removed, then (<code>K</code> - 'A') bytes 
+     * should be trimmed from the end
+     * and then the <code>suffix</code> should be appended to the resulting byte sequence.
+     * 
+     * <p>Examples:</p>
+     * <pre>
+     * src: ayz
+     * dst: abc
+     * encoded: AACbc
+     * 
+     * src: aillent
+     * dst: aller
+     * encoded: BBCr
+     * </pre>
+     * 
+     * <p><strong>Note:</strong> Each code's length is a single byte. If any is equal to 
+     * {@link SequenceEncoders#REMOVE_EVERYTHING} the entire <code>src</code> sequence
+     * should be discarded.</p>
+     */
+    public static class TrimInfixAndSuffixEncoder implements IEncoder {
+        ByteArrayList scratch = new ByteArrayList();
+
+        public ByteArrayList encode(ByteArrayList src, ByteArrayList dst, ByteArrayList encoded) {
+            // Search for the infix that can we can encode and remove from src
+            // to get a maximum-length prefix of dst. This could be done more efficiently
+            // by running a smarter longest-common-subsequence algorithm and some pruning (?).
+            //
+            // For now, naive loop should do.
+
+            // There can be only two positions for the infix to delete:
+            // 1) we remove leading bytes, even if they are partially matching (but a longer match
+            //    exists somewhere later on).
+            // 2) we leave max. matching prefix and remove non-matching bytes that follow. 
+            int maxInfixIndex = 0;
+            int maxSubsequenceLength = sharedPrefixLength(src, dst);
+            int maxInfixLength = 0;
+            for (int i : new int [] {0, maxSubsequenceLength}) {
+                for (int j = 1; j <= src.size() - i; j++) {
+                    // Compute temporary src with the infix removed.
+                    // Concatenate in scratch space for simplicity.
+                    scratch.clear();
+                    scratch.add(src.buffer, 0, i);
+                    scratch.add(src.buffer, i + j, src.size() - (i + j));
+    
+                    int sharedPrefix = sharedPrefixLength(scratch, dst);
+
+                    // Only update maxSubsequenceLength if we will be able to encode it.
+                    if (sharedPrefix > 0 && 
+                        sharedPrefix > maxSubsequenceLength &&
+                        i < REMOVE_EVERYTHING &&
+                        j < REMOVE_EVERYTHING) {
+                        maxSubsequenceLength = sharedPrefix;
+                        maxInfixIndex = i;
+                        maxInfixLength = j;
+                    }
+                }
+            }
+            
+            int truncateSuffixBytes = src.size() - (maxInfixLength + maxSubsequenceLength);
+            
+            // Special case: if we're removing the suffix in the infix code, move it
+            // to the suffix code instead.
+            if (truncateSuffixBytes == 0 &&
+                maxInfixIndex + maxInfixLength == src.size()) {
+                truncateSuffixBytes = maxInfixLength;
+                maxInfixIndex = maxInfixLength = 0;
+            }
+                
+            
+            if (maxInfixIndex >= REMOVE_EVERYTHING ||
+                maxInfixLength >= REMOVE_EVERYTHING ||
+                truncateSuffixBytes >= REMOVE_EVERYTHING) {
+                maxInfixIndex = maxSubsequenceLength = 0;
+                maxInfixLength = truncateSuffixBytes = REMOVE_EVERYTHING;
+            }
+
+            encoded.add((byte) ((maxInfixIndex       + 'A') & 0xFF));
+            encoded.add((byte) ((maxInfixLength      + 'A') & 0xFF));
+            encoded.add((byte) ((truncateSuffixBytes + 'A') & 0xFF));
+            encoded.add(dst.buffer, maxSubsequenceLength, dst.size() - maxSubsequenceLength);
+
+            return encoded;
+        }
+
+        public ByteArrayList decode(ByteArrayList src, ByteArrayList encoded, ByteArrayList dst) {
+            int infixIndex  = (encoded.get(0) - 'A') & 0xFF;
+            int infixLength = (encoded.get(1) - 'A') & 0xFF;
+            int truncateSuffixBytes = (encoded.get(2) - 'A') & 0xFF;
+
+            if (infixLength == REMOVE_EVERYTHING ||
+                truncateSuffixBytes == REMOVE_EVERYTHING) {
+                infixIndex = 0;
+                infixLength = src.size();
+                truncateSuffixBytes = 0;
+            }
+
+            dst.add(src.buffer, 0, infixIndex);
+            dst.add(src.buffer, infixIndex + infixLength, src.size() - (infixIndex + infixLength + truncateSuffixBytes));
+            dst.add(encoded.buffer, 3, encoded.size() - 3);
+
+            return dst;
+        }
+
+        @Override
+        public EncoderType type() {
+            return EncoderType.INFIX;
+        }
+
+        @Override
+        public String toString() {
+            return getClass().getSimpleName();
+        }        
+    }
+    
+    /**
+     * 
+     */
+    public static class CopyEncoder implements IEncoder {
+        @Override
+        public ByteArrayList encode(ByteArrayList src, ByteArrayList derived, ByteArrayList encodedBuffer)
+        {
+            encodedBuffer.add(derived.buffer, 0, derived.size());
+            return encodedBuffer;
+        }
+        
+        @Override
+        public ByteArrayList decode(ByteArrayList src, ByteArrayList encoded, ByteArrayList derivedBuffer)
+        {
+            derivedBuffer.add(encoded.buffer, 0, encoded.size());
+            return derivedBuffer;
+        }
+        
+        @Override
+        public EncoderType type() {
+            return EncoderType.NONE;
+        }
+
+        @Override
+        public String toString() {
+            return getClass().getSimpleName();
+        }        
+    }
+
+    /**
+     * Compute the length of the shared prefix between two byte sequences.
+     */
+    private static int sharedPrefixLength(ByteArrayList a, ByteArrayList b) {
+        final int max = Math.min(a.size(), b.size());
+        int i = 0;
+        while (i < max && a.get(i) == b.get(i)) {
+            i++;
+        }
+        return i;
+    }
+
+    /**
+     * Compute the length of the shared prefix between two byte sequences.
+     */
+    private static int sharedPrefixLength(ByteArrayList a, int aStart, ByteArrayList b, int bStart) {
+
+        int i = 0;
+        while (aStart < a.size() && 
+               bStart < b.size() &&
+               a.get(aStart++) == b.get(bStart++)) {
+            i++;
+        }
+        return i;
+    }
+
+    public static IEncoder forType(EncoderType encType)
+    {
+        switch (encType) {
+            case INFIX:  return new TrimInfixAndSuffixEncoder();
+            case PREFIX: return new TrimPrefixAndSuffixEncoder();
+            case SUFFIX: return new TrimSuffixEncoder();
+            case NONE:   return new CopyEncoder();
+        }
+        throw new RuntimeException("Unknown encoder: " + encType); 
+    }
+}
diff --git a/morfologik-tools/src/main/java/morfologik/tools/SharedOptions.java b/morfologik-tools/src/main/java/morfologik/tools/SharedOptions.java
new file mode 100644
index 0000000..11b42aa
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/SharedOptions.java
@@ -0,0 +1,152 @@
+package morfologik.tools;
+
+import java.io.File;
+import java.util.Arrays;
+
+import morfologik.stemming.EncoderType;
+
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+
+/**
+ * Options shared between tools.
+ */
+@SuppressWarnings("static-access")
+final class SharedOptions {
+    public final static Option fsaDictionaryFileOption = OptionBuilder
+        .hasArg()
+        .withArgName("file")
+        .withDescription("Path to the FSA dictionary.")
+        .withLongOpt("dictionary")
+        .withType(File.class)
+        .isRequired(true)
+        .create("d");
+
+	public final static Option decode = OptionBuilder
+	    .withDescription("Decode prefix/ infix/ suffix forms (if available).")
+	    .withLongOpt("decode")
+	    .isRequired(false)
+	    .create("x");
+
+	public final static Option dataOnly = OptionBuilder
+	    .withDescription("Dump only raw FSA data.")
+	    .withLongOpt("raw-data")
+	    .isRequired(false)
+	    .create("r");
+
+	public final static Option dot = OptionBuilder
+	    .withDescription("Dump the automaton as graphviz DOT file.")
+	    .withLongOpt("dot")
+	    .isRequired(false)
+	    .create();
+
+	public final static Option inputEncodingOption = OptionBuilder
+	    .hasArg()
+	    .withArgName("codepage")
+	    .withDescription("Input stream encoding.")
+	    .withLongOpt("input-encoding")
+	    .isRequired(false)
+	    .create("ie");
+
+	public final static Option outputEncodingOption = OptionBuilder
+	    .hasArg()
+	    .withArgName("codepage")
+	    .withDescription("Output stream encoding.")
+	    .withLongOpt("output-encoding")
+	    .isRequired(false)
+	    .create("oe");
+
+	public final static Option inputFileOption = OptionBuilder
+	    .hasArg()
+	    .withArgName("file")
+	    .withDescription("Input file. If missing, standard input is used.")
+	    .withLongOpt("input")
+	    .withType(File.class)
+	    .isRequired(false)
+	    .create("i");
+
+	public final static Option outputFileOption = OptionBuilder
+	    .hasArg()
+	    .withArgName("file")
+	    .withDescription("Output file. If missing, standard output is used.")
+	    .withLongOpt("output")
+	    .withType(File.class)
+	    .isRequired(false)
+	    .create("o");
+
+    public final static Option outputFormatOption = OptionBuilder
+        .hasArg()
+        .withArgName("format")
+        .withDescription("Name of the binary output format. Allowed values: " + Arrays.toString(FSABuildTool.Format.values()))
+        .withLongOpt("format")
+        .isRequired(false)
+        .create("f");
+
+	public final static Option fillerCharacterOption = OptionBuilder
+		.hasArg()
+    	.withArgName("char")
+    	.withDescription("Custom filler character")
+    	.isRequired(false)
+    	.withLongOpt("filler")
+    	.create();
+
+	public final static Option annotationSeparatorCharacterOption = OptionBuilder
+		.hasArg()
+		.withArgName("char")
+		.withDescription("Custom annotation separator character")
+		.isRequired(false)
+		.withLongOpt("annotation")
+		.create();
+
+	public final static Option withNumbersOption = OptionBuilder
+	    .withDescription("Include numbers required for perfect hashing (larger automaton)")
+	    .isRequired(false)
+	    .withLongOpt("with-numbers")
+	    .create("n");
+
+    public final static Option progressOption = OptionBuilder
+        .withDescription("Print more verbose progress information")
+        .isRequired(false)
+        .withLongOpt("progress")
+        .create();
+
+    public final static Option inputSortedOption = OptionBuilder
+        .withDescription("Assume the input is already sorted using C-sort (builds FSA directly, no in-memory sorting)")
+        .isRequired(false)
+        .withLongOpt("sorted")
+        .create();
+
+	public final static Option encoder = OptionBuilder
+	    .withDescription("Encoder used for compressing inflected forms. Any of: "
+	        + Arrays.toString(EncoderType.values()))
+	    .withLongOpt("encoder")
+        .hasArg(true)
+	    .withArgName("name")
+	    .isRequired(false)
+	    .create("e");
+
+	public final static Option noWarnIfTwoFields = OptionBuilder
+	    .withDescription("Suppress warning for lines with only two fields (for stemming dictionaries)")
+		.withLongOpt("nowarn")
+		.isRequired(false)
+		.create("nw");
+
+    public final static Option statistics = OptionBuilder
+        .withDescription("Print extra statistics.")
+        .isRequired(false)
+        .withLongOpt("stats")
+        .create();
+
+    public final static Option help = OptionBuilder
+        .withDescription("Help on available options.")
+        .withLongOpt("help")
+        .isRequired(false)
+        .create();
+
+	/**
+	 * No instances. Use static fields.
+	 */
+	private SharedOptions() {
+		// empty
+	}
+}
diff --git a/morfologik-tools/src/main/java/morfologik/tools/Tool.java b/morfologik-tools/src/main/java/morfologik/tools/Tool.java
new file mode 100644
index 0000000..27dac3f
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/Tool.java
@@ -0,0 +1,102 @@
+package morfologik.tools;
+
+import org.apache.commons.cli.*;
+
+/**
+ * Base class for command-line applications.
+ */
+abstract class Tool {
+	/** Command line options. */
+	protected final Options options = new Options();
+
+	/**
+	 * Initializes application context.
+	 */
+	protected final void go(String[] args) {
+        options.addOption(SharedOptions.help);
+		initializeOptions(options);
+		
+		// Commons-cli is pretty dumb in terms of option parsing because it
+		// validates immediately and there is no way to determine
+		// if an option exists without bailing out with an exception. This
+		// is a hardcoded workaround for --help
+		for (String arg : args) { 
+		    if ("--help".equals(arg)) {
+		        printUsage();
+	            return;
+		    }
+		}
+
+		final Parser parser = new GnuParser();
+		final CommandLine line;
+		try {
+			line = parser.parse(options, args);
+			if (line.hasOption(SharedOptions.help.getLongOpt())) {
+			    printUsage();
+			    return;
+			}
+			if (line.getArgList().size() > 0) {
+			    printError("Unreconized left over command line arguments: "
+			            + line.getArgList());
+			    return;
+			}
+
+			try {
+				go(line);
+			} catch (Throwable e) {
+				printError("Unhandled program error occurred.", e);
+			}
+		} catch (MissingArgumentException e) {
+			printError("Provide the required argument for option: "
+			        + e.getMessage());
+		} catch (MissingOptionException e) {
+			printError("Provide the required option: " + e.getMessage());
+		} catch (UnrecognizedOptionException e) {
+			printError(e.getMessage());
+		} catch (ParseException e) {
+			printError("Could not parse command line: " + e.getMessage());
+		}
+	}
+
+	/**
+	 * Print an error and an associated exception.
+	 */
+	protected void printError(String msg, Throwable t) {
+		printError(msg);
+		t.printStackTrace(System.err);
+	}
+
+	/**
+	 * Print an error without an exception.
+	 */
+	protected void printError(String msg) {
+		System.err.println();
+		System.err.println(msg);
+		System.err.println("Invoke with '--help' for help.");
+	}
+
+	/**
+	 * Prints usage (options).
+	 */
+	protected void printUsage() {
+		final HelpFormatter formatter = new HelpFormatter();
+		formatter.printHelp(this.getClass().getName(), options, true);
+	}
+
+	/**
+	 * Override and write your stuff using command line options.
+	 */
+	protected abstract void go(CommandLine line) throws Exception;
+
+	/**
+	 * Override and initialize options.
+	 */
+	protected abstract void initializeOptions(Options options);
+
+	/**
+	 * Is the tool available? <code>true</code> by default.
+	 */
+	protected boolean isAvailable() {
+		return true;
+	}
+}
diff --git a/morfologik-tools/src/main/java/morfologik/tools/WriterMessageLogger.java b/morfologik-tools/src/main/java/morfologik/tools/WriterMessageLogger.java
new file mode 100644
index 0000000..5caee57
--- /dev/null
+++ b/morfologik-tools/src/main/java/morfologik/tools/WriterMessageLogger.java
@@ -0,0 +1,125 @@
+package morfologik.tools;
+
+import java.io.PrintWriter;
+import java.util.*;
+
+import morfologik.fsa.IMessageLogger;
+
+/**
+ * A logger dumping info to <code>System.err</code>.
+ */
+public class WriterMessageLogger implements IMessageLogger {
+    /**
+     * Start of the world timestamp.
+     */
+    private final static long world = System.currentTimeMillis();
+
+    /**
+     * A single part: name, start timestamp.
+     */
+    private static class Part {
+        final String name;
+        final long start;
+
+        Part(String name, long start) {
+            this.name = name;
+            this.start = start;
+        }
+    }
+
+    /**
+     * Is the output currently indented?
+     */
+    private boolean indent;
+
+    /**
+     * Active parts.
+     */
+    private ArrayDeque<Part> parts = new ArrayDeque<Part>();
+
+    /**
+     * Output writer.
+     */
+    private final PrintWriter writer;
+    
+    /**
+     * 
+     */
+    public WriterMessageLogger(PrintWriter w) {
+        this.writer = w;
+    }
+    
+    /* 
+     *
+     */
+    @Override
+    public void log(String msg) {
+        cancelIndent();
+
+        writer.println(msg);
+        writer.flush();
+    }
+
+    /* 
+     *
+     */
+    @Override
+    public void log(String header, Object v) {
+        cancelIndent();
+    
+        if (v instanceof Integer || v instanceof Long) {
+            writer.println(String.format(Locale.ENGLISH, "%-30s  %,11d", header, v));
+        } else {
+            writer.println(String.format(Locale.ENGLISH, "%-30s  %11s", header, v.toString()));
+        }
+        writer.flush();
+    }
+
+    /* 
+     *
+     */
+    @Override
+    public void startPart(String header) {
+        cancelIndent();
+
+        Part p = new Part(header, System.currentTimeMillis());
+        parts.addLast(p);
+
+        writer.print(String.format(Locale.ENGLISH, "%-30s", p.name + "..."));
+        writer.flush();
+
+        indent = true;
+    }
+
+    /* 
+     *
+     */
+    @Override
+    public void endPart() {
+        long now = System.currentTimeMillis();
+        Part p = parts.removeLast();
+
+        if (!indent) {
+            writer.print(String.format(Locale.ENGLISH, "%-30s", p.name + "..."));
+        }
+
+        writer.println(
+                String.format(Locale.ENGLISH, "%13.2f sec.  [%6.2f sec.]", 
+                (now - p.start) / 1000.0,
+                (now - world) / 1000.0));
+        writer.flush();
+
+        indent = false;
+    }
+
+    /*
+     * 
+     */
+    private void cancelIndent() {
+        if (indent) {
+            System.err.println();
+        }
+
+        indent = false;
+    }
+}