From 781e1fe64337af4712341611a88654670c43a67d Mon Sep 17 00:00:00 2001 From: Andrej Shadura Date: Wed, 26 Dec 2018 20:22:34 +0100 Subject: Import Upstream version 1.2.2 --- doc/api/morfologik/fsa/FSA.html | 919 +++++++++++++++++++++ doc/api/morfologik/fsa/FSAFinalStatesIterator.html | 302 +++++++ doc/api/morfologik/fsa/FSAFlags.html | 451 ++++++++++ doc/api/morfologik/fsa/FSAHelpers.html | 268 ++++++ doc/api/morfologik/fsa/FSAMatch.html | 300 +++++++ doc/api/morfologik/fsa/FSAMatchType.html | 395 +++++++++ doc/api/morfologik/fsa/FSATraversalHelper.html | 358 ++++++++ doc/api/morfologik/fsa/FSAVer5Impl.html | 686 +++++++++++++++ doc/api/morfologik/fsa/package-frame.html | 55 ++ doc/api/morfologik/fsa/package-summary.html | 200 +++++ doc/api/morfologik/fsa/package-tree.html | 165 ++++ doc/api/morfologik/stemming/Dictionary.html | 489 +++++++++++ .../morfologik/stemming/DictionaryIterator.html | 309 +++++++ doc/api/morfologik/stemming/DictionaryLookup.html | 375 +++++++++ .../morfologik/stemming/DictionaryMetadata.html | 437 ++++++++++ doc/api/morfologik/stemming/IStemmer.html | 220 +++++ doc/api/morfologik/stemming/PolishStemmer.html | 302 +++++++ doc/api/morfologik/stemming/WordData.html | 447 ++++++++++ doc/api/morfologik/stemming/package-frame.html | 53 ++ doc/api/morfologik/stemming/package-summary.html | 190 +++++ doc/api/morfologik/stemming/package-tree.html | 160 ++++ doc/api/morfologik/tools/DumpTool.html | 436 ++++++++++ doc/api/morfologik/tools/InflectionFramesTool.html | 279 +++++++ doc/api/morfologik/tools/Launcher.html | 259 ++++++ doc/api/morfologik/tools/package-frame.html | 36 + doc/api/morfologik/tools/package-summary.html | 164 ++++ doc/api/morfologik/tools/package-tree.html | 151 ++++ doc/api/morfologik/util/Arrays.html | 302 +++++++ doc/api/morfologik/util/BufferUtils.html | 252 ++++++ doc/api/morfologik/util/FileUtils.html | 251 ++++++ doc/api/morfologik/util/PerformanceTimer.html | 303 +++++++ doc/api/morfologik/util/ResourceUtils.html | 228 +++++ doc/api/morfologik/util/package-frame.html | 40 + doc/api/morfologik/util/package-summary.html | 171 ++++ doc/api/morfologik/util/package-tree.html | 151 ++++ 35 files changed, 10104 insertions(+) create mode 100644 doc/api/morfologik/fsa/FSA.html create mode 100644 doc/api/morfologik/fsa/FSAFinalStatesIterator.html create mode 100644 doc/api/morfologik/fsa/FSAFlags.html create mode 100644 doc/api/morfologik/fsa/FSAHelpers.html create mode 100644 doc/api/morfologik/fsa/FSAMatch.html create mode 100644 doc/api/morfologik/fsa/FSAMatchType.html create mode 100644 doc/api/morfologik/fsa/FSATraversalHelper.html create mode 100644 doc/api/morfologik/fsa/FSAVer5Impl.html create mode 100644 doc/api/morfologik/fsa/package-frame.html create mode 100644 doc/api/morfologik/fsa/package-summary.html create mode 100644 doc/api/morfologik/fsa/package-tree.html create mode 100644 doc/api/morfologik/stemming/Dictionary.html create mode 100644 doc/api/morfologik/stemming/DictionaryIterator.html create mode 100644 doc/api/morfologik/stemming/DictionaryLookup.html create mode 100644 doc/api/morfologik/stemming/DictionaryMetadata.html create mode 100644 doc/api/morfologik/stemming/IStemmer.html create mode 100644 doc/api/morfologik/stemming/PolishStemmer.html create mode 100644 doc/api/morfologik/stemming/WordData.html create mode 100644 doc/api/morfologik/stemming/package-frame.html create mode 100644 doc/api/morfologik/stemming/package-summary.html create mode 100644 doc/api/morfologik/stemming/package-tree.html create mode 100644 doc/api/morfologik/tools/DumpTool.html create mode 100644 doc/api/morfologik/tools/InflectionFramesTool.html create mode 100644 doc/api/morfologik/tools/Launcher.html create mode 100644 doc/api/morfologik/tools/package-frame.html create mode 100644 doc/api/morfologik/tools/package-summary.html create mode 100644 doc/api/morfologik/tools/package-tree.html create mode 100644 doc/api/morfologik/util/Arrays.html create mode 100644 doc/api/morfologik/util/BufferUtils.html create mode 100644 doc/api/morfologik/util/FileUtils.html create mode 100644 doc/api/morfologik/util/PerformanceTimer.html create mode 100644 doc/api/morfologik/util/ResourceUtils.html create mode 100644 doc/api/morfologik/util/package-frame.html create mode 100644 doc/api/morfologik/util/package-summary.html create mode 100644 doc/api/morfologik/util/package-tree.html (limited to 'doc/api/morfologik') diff --git a/doc/api/morfologik/fsa/FSA.html b/doc/api/morfologik/fsa/FSA.html new file mode 100644 index 0000000..cdaccea --- /dev/null +++ b/doc/api/morfologik/fsa/FSA.html @@ -0,0 +1,919 @@ + + + + + + +FSA + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Class FSA

+
+java.lang.Object
+  extended by morfologik.fsa.FSA
+
+
+
All Implemented Interfaces:
java.lang.Iterable<java.nio.ByteBuffer>
+
+
+
Direct Known Subclasses:
FSAVer5Impl
+
+
+
+
public abstract class FSA
extends java.lang.Object
implements java.lang.Iterable<java.nio.ByteBuffer>
+ + +

+This class implements Finite State Automaton traversal as described in Jan + Daciuk's Incremental Construction of Finite-State Automata and + Transducers, and Their Use in the Natural Language Processing (PhD + thesis, Technical University of Gdansk). + +

+ This is an abstract base class for all forms of binary storage present in Jan + Daciuk's FSA package. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+protected  bytefiller + +
+          The meaning of this field is not clear (check the FSA documentation).
+protected  bytegotoLength + +
+          Size of transition's destination node "address".
+protected  byteversion + +
+          Dictionary version (derived from the combination of flags).
+static byteVERSION_5 + +
+          Version number for version 5 of the automaton.
+  + + + + + + + + + + + +
+Constructor Summary
+protected FSA(java.io.InputStream fsaStream, + java.lang.String dictionaryEncoding) + +
+          Creates a new automaton reading the FSA automaton from an input stream.
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ chargetAnnotationSeparator() + +
+          Return the annotation separator character, converted to a character + according to the encoding scheme passed in in the constructor of this + class.
+abstract  intgetArc(int node, + byte label) + +
+          Returns the identifier of an arc leaving node and labeled + with label.
+abstract  bytegetArcLabel(int arc) + +
+          Return the label associated with a given arc.
+abstract  intgetEndNode(int arc) + +
+          Return the end node pointed to by a given arc.
+ chargetFillerCharacter() + +
+          Return the filler character, converted to a character according to the + encoding scheme passed in in the constructor of this class.
+abstract  intgetFirstArc(int node) + +
+          Returns the identifier of the first arc leaving node or 0 if + the node has no outgoing arcs.
+ intgetFlags() + +
+          Returns a set of flags for this FSA instance.
+static FSAgetInstance(java.io.File fsaFile, + java.lang.String dictionaryEncoding) + +
+          This static method will attempt to instantiate an appropriate + implementation of the FSA for the version found in file given in the + input argument.
+static FSAgetInstance(java.io.InputStream fsaStream, + java.lang.String dictionaryEncoding) + +
+          This static method will attempt to instantiate an appropriate + implementation of the FSA for the version found in file given in the + input argument.
+abstract  intgetNextArc(int node, + int arc) + +
+          Returns the identifier of the next arc after arc and leaving + node.
+abstract  intgetNumberOfArcs() + +
+          Returns the number of arcs in this automaton.
+abstract  intgetNumberOfNodes() + +
+          Returns the number of nodes in this automaton.
+abstract  intgetRootNode() + +
+          Returns the identifier of the root node of this automaton.
+ FSATraversalHelpergetTraversalHelper() + +
+          Returns an object which can be used to walk the edges of this finite + state automaton and match arbitrary sequences against its states.
+ intgetVersion() + +
+          Returns the version number of the binary representation of this FSA.
+abstract  booleanisArcFinal(int arc) + +
+          Returns true if the destination node at the end of this + arc corresponds to an input sequence created when building + this automaton.
+abstract  booleanisArcTerminal(int arc) + +
+          Returns true if this arc does not have a + terminating node.
+ java.util.Iterator<java.nio.ByteBuffer>iterator() + +
+          Returns an iterator over all binary sequences starting from the initial + FSA state and ending in final nodes.
+protected  byte[]readFully(java.io.InputStream stream) + +
+          Reads all bytes from an input stream.
+protected  voidreadHeader(java.io.DataInput in, + long fileSize) + +
+          Reads a FSA header from a stream.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+VERSION_5

+
+public static final byte VERSION_5
+
+
Version number for version 5 of the automaton. +

+

+
See Also:
Constant Field Values
+
+
+ +

+version

+
+protected byte version
+
+
Dictionary version (derived from the combination of flags). +

+

+
+
+
+ +

+filler

+
+protected byte filler
+
+
The meaning of this field is not clear (check the FSA documentation). +

+

+
+
+
+ +

+gotoLength

+
+protected byte gotoLength
+
+
Size of transition's destination node "address". This field may also have + different interpretation, or may not be used at all. It depends on the + combination of flags used for building FSA. +

+

+
+
+ + + + + + + + +
+Constructor Detail
+ +

+FSA

+
+protected FSA(java.io.InputStream fsaStream,
+              java.lang.String dictionaryEncoding)
+       throws java.io.IOException
+
+
Creates a new automaton reading the FSA automaton from an input stream. +

+

+
Parameters:
fsaStream - An input stream with FSA automaton. +
Throws: +
java.io.IOException - if the dictionary file cannot be read, or version of the file + is not supported.
+
+ + + + + + + + +
+Method Detail
+ +

+getVersion

+
+public final int getVersion()
+
+
Returns the version number of the binary representation of this FSA. + +

+ The version number is a derivation of combination of flags and is exactly + the same as in Jan Daciuk's FSA package. +

+

+
+
+
+
+
+
+
+ +

+getFlags

+
+public final int getFlags()
+
+
Returns a set of flags for this FSA instance. Each flag is represented by + a unique bit in the integer returned. Therefore to check whether the + dictionary has been built using FSAFlags.FLEXIBLE flag, one must + perform a bitwise AND: + boolean isFlexible = ((dict.getFlags() & FSA.FSA_FLEXIBLE ) != 0) +

+

+
+
+
+
+
+
+
+ +

+getAnnotationSeparator

+
+public final char getAnnotationSeparator()
+
+
Return the annotation separator character, converted to a character + according to the encoding scheme passed in in the constructor of this + class. +

+

+
+
+
+
+
+
+
+ +

+getFillerCharacter

+
+public final char getFillerCharacter()
+
+
Return the filler character, converted to a character according to the + encoding scheme passed in in the constructor of this class. +

+

+
+
+
+
+
+
+
+ +

+getNumberOfArcs

+
+public abstract int getNumberOfArcs()
+
+
Returns the number of arcs in this automaton. Depending on the + representation of the automaton, this method may take a long time to + finish. +

+

+
+
+
+
+
+
+
+ +

+getNumberOfNodes

+
+public abstract int getNumberOfNodes()
+
+
Returns the number of nodes in this automaton. Depending on the + representation of the automaton, this method may take a long time to + finish. +

+

+
+
+
+
+
+
+
+ +

+getTraversalHelper

+
+public FSATraversalHelper getTraversalHelper()
+
+
Returns an object which can be used to walk the edges of this finite + state automaton and match arbitrary sequences against its states. +

+

+
+
+
+
+
+
+
+ +

+getInstance

+
+public static FSA getInstance(java.io.File fsaFile,
+                              java.lang.String dictionaryEncoding)
+                       throws java.io.IOException
+
+
This static method will attempt to instantiate an appropriate + implementation of the FSA for the version found in file given in the + input argument. +

+

+
+
+
+ +
Throws: +
java.io.IOException - An exception is thrown if no corresponding FSA parser is + found or if the input file cannot be opened.
+
+
+
+ +

+getInstance

+
+public static FSA getInstance(java.io.InputStream fsaStream,
+                              java.lang.String dictionaryEncoding)
+                       throws java.io.IOException
+
+
This static method will attempt to instantiate an appropriate + implementation of the FSA for the version found in file given in the + input argument. +

+

+
+
+
+ +
Throws: +
java.io.IOException - An exception is thrown if no corresponding FSA parser is + found or if the input file cannot be opened.
+
+
+
+ +

+readHeader

+
+protected void readHeader(java.io.DataInput in,
+                          long fileSize)
+                   throws java.io.IOException
+
+
Reads a FSA header from a stream. +

+

+
+
+
+ +
Throws: +
java.io.IOException - If the stream is not a dictionary, or if the version is not + supported.
+
+
+
+ +

+readFully

+
+protected byte[] readFully(java.io.InputStream stream)
+                    throws java.io.IOException
+
+
Reads all bytes from an input stream. +

+

+
+
+
+
Parameters:
stream - +
Returns:
Returns an array of read bytes. +
Throws: +
java.io.IOException
+
+
+
+ +

+iterator

+
+public java.util.Iterator<java.nio.ByteBuffer> iterator()
+
+
Returns an iterator over all binary sequences starting from the initial + FSA state and ending in final nodes. The returned iterator is a + ByteBuffer that changes on each call to Iterator.next(), + so if the content should be preserved, it must be copied somewhere else. + +

+ It is guaranteed that the returned byte buffer is backed by a byte array + and that the content of the byte buffer starts at the array's index 0. +

+

+
Specified by:
iterator in interface java.lang.Iterable<java.nio.ByteBuffer>
+
+
+
+
+
+
+ +

+getRootNode

+
+public abstract int getRootNode()
+
+
Returns the identifier of the root node of this automaton. May return 0 + if the start node is also the end node. +

+

+
+
+
+
See Also:
getTraversalHelper()
+
+
+
+ +

+getFirstArc

+
+public abstract int getFirstArc(int node)
+
+
Returns the identifier of the first arc leaving node or 0 if + the node has no outgoing arcs. +

+

+
+
+
+
See Also:
getTraversalHelper()
+
+
+
+ +

+getArc

+
+public abstract int getArc(int node,
+                           byte label)
+
+
Returns the identifier of an arc leaving node and labeled + with label. An identifier equal to 0 means the node has no + outgoing arc labeled label. +

+

+
+
+
+
See Also:
getTraversalHelper()
+
+
+
+ +

+getNextArc

+
+public abstract int getNextArc(int node,
+                               int arc)
+
+
Returns the identifier of the next arc after arc and leaving + node. Zero is returned if no more arcs are available for the + node. +

+

+
+
+
+
See Also:
getTraversalHelper()
+
+
+
+ +

+getEndNode

+
+public abstract int getEndNode(int arc)
+
+
Return the end node pointed to by a given arc. Terminal arcs + (those that point to a terminal state) have no end node representation + and throw a runtime exception. +

+

+
+
+
+
See Also:
getTraversalHelper()
+
+
+
+ +

+getArcLabel

+
+public abstract byte getArcLabel(int arc)
+
+
Return the label associated with a given arc. +

+

+
+
+
+
+
+
+
+ +

+isArcFinal

+
+public abstract boolean isArcFinal(int arc)
+
+
Returns true if the destination node at the end of this + arc corresponds to an input sequence created when building + this automaton. +

+

+
+
+
+
+
+
+
+ +

+isArcTerminal

+
+public abstract boolean isArcTerminal(int arc)
+
+
Returns true if this arc does not have a + terminating node. +

+

+
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/FSAFinalStatesIterator.html b/doc/api/morfologik/fsa/FSAFinalStatesIterator.html new file mode 100644 index 0000000..994e490 --- /dev/null +++ b/doc/api/morfologik/fsa/FSAFinalStatesIterator.html @@ -0,0 +1,302 @@ + + + + + + +FSAFinalStatesIterator + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Class FSAFinalStatesIterator

+
+java.lang.Object
+  extended by morfologik.fsa.FSAFinalStatesIterator
+
+
+
All Implemented Interfaces:
java.util.Iterator<java.nio.ByteBuffer>
+
+
+
+
public final class FSAFinalStatesIterator
extends java.lang.Object
implements java.util.Iterator<java.nio.ByteBuffer>
+ + +

+An iterator that traverses all final states reachable from a given + node and returns byte sequences corresponding to final states. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ booleanhasNext() + +
+          Returns true if there are still elements in this iterator.
+ java.nio.ByteBuffernext() + +
+           
+ voidremove() + +
+          Not implemented in this iterator.
+ voidrestartFrom(int node) + +
+          Restart walking from node.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+restartFrom

+
+public void restartFrom(int node)
+
+
Restart walking from node. +

+

+
+
+
+
+
+
+
+ +

+hasNext

+
+public boolean hasNext()
+
+
Returns true if there are still elements in this iterator. +

+

+
Specified by:
hasNext in interface java.util.Iterator<java.nio.ByteBuffer>
+
+
+
+
+
+
+ +

+next

+
+public java.nio.ByteBuffer next()
+
+
+
Specified by:
next in interface java.util.Iterator<java.nio.ByteBuffer>
+
+
+ +
Returns:
Returns a ByteBuffer with the sequence corresponding + to the next final state in the automaton.
+
+
+
+ +

+remove

+
+public final void remove()
+
+
Not implemented in this iterator. +

+

+
Specified by:
remove in interface java.util.Iterator<java.nio.ByteBuffer>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/FSAFlags.html b/doc/api/morfologik/fsa/FSAFlags.html new file mode 100644 index 0000000..dde0bbb --- /dev/null +++ b/doc/api/morfologik/fsa/FSAFlags.html @@ -0,0 +1,451 @@ + + + + + + +FSAFlags + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Enum FSAFlags

+
+java.lang.Object
+  extended by java.lang.Enum<FSAFlags>
+      extended by morfologik.fsa.FSAFlags
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<FSAFlags>
+
+
+
+
public enum FSAFlags
extends java.lang.Enum<FSAFlags>
+ + +

+FSA automaton flags. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+Enum Constant Summary
FLEXIBLE + +
+           
LARGE_DICTIONARIES + +
+           
NEXTBIT + +
+           
STOPBIT + +
+           
TAILS + +
+           
WEIGHTED + +
+           
+ + + + + + + + + + +
+Field Summary
+ intbits + +
+          Bit mask for the corresponding flag.
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static booleanisSet(int flags, + FSAFlags flag) + +
+          Returns true if the corresponding flag is set in the + bit set.
+static FSAFlagsvalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static FSAFlags[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+FLEXIBLE

+
+public static final FSAFlags FLEXIBLE
+
+
+
+
+
+ +

+STOPBIT

+
+public static final FSAFlags STOPBIT
+
+
+
+
+
+ +

+NEXTBIT

+
+public static final FSAFlags NEXTBIT
+
+
+
+
+
+ +

+TAILS

+
+public static final FSAFlags TAILS
+
+
+
+
+
+ +

+WEIGHTED

+
+public static final FSAFlags WEIGHTED
+
+
+
+
+
+ +

+LARGE_DICTIONARIES

+
+public static final FSAFlags LARGE_DICTIONARIES
+
+
+
+
+ + + + + + + + +
+Field Detail
+ +

+bits

+
+public final int bits
+
+
Bit mask for the corresponding flag. +

+

+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static FSAFlags[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (FSAFlags c : FSAFlags.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static FSAFlags valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+
+ +

+isSet

+
+public static boolean isSet(int flags,
+                            FSAFlags flag)
+
+
Returns true if the corresponding flag is set in the + bit set. +

+

+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/FSAHelpers.html b/doc/api/morfologik/fsa/FSAHelpers.html new file mode 100644 index 0000000..7932c50 --- /dev/null +++ b/doc/api/morfologik/fsa/FSAHelpers.html @@ -0,0 +1,268 @@ + + + + + + +FSAHelpers + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Class FSAHelpers

+
+java.lang.Object
+  extended by morfologik.fsa.FSAHelpers
+
+
+
+
public final class FSAHelpers
extends java.lang.Object
+ + +

+This class has several static utility methods for use + with the FSA package. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static java.lang.StringflagsToString(int flags) + +
+          Converts an integer with FSA flags to a human-readable string.
+static intgetFlags(int version) + +
+          Returns flags as an integer for a given version number.
+static bytegetVersion(int flags) + +
+          Returns a version number for a set of flags.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+flagsToString

+
+public static java.lang.String flagsToString(int flags)
+
+
Converts an integer with FSA flags to a human-readable string. +

+

+
+
+
+
+ +

+getVersion

+
+public static byte getVersion(int flags)
+
+
Returns a version number for a set of flags. +

+

+
+
+
+
+ +

+getFlags

+
+public static int getFlags(int version)
+
+
Returns flags as an integer for a given version number. +

+

+ +
Throws: +
java.lang.RuntimeException - if the version number is not recognized.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/FSAMatch.html b/doc/api/morfologik/fsa/FSAMatch.html new file mode 100644 index 0000000..9aa0419 --- /dev/null +++ b/doc/api/morfologik/fsa/FSAMatch.html @@ -0,0 +1,300 @@ + + + + + + +FSAMatch + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Class FSAMatch

+
+java.lang.Object
+  extended by morfologik.fsa.FSAMatch
+
+
+
+
public final class FSAMatch
extends java.lang.Object
+ + +

+A matching result returned from FSATraversalHelper. +

+ +

+

+
See Also:
FSATraversalHelper
+
+ +

+ + + + + + + + + + + +
+Constructor Summary
FSAMatch() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ FSAMatchTypegetMatchType() + +
+          Return match type.
+ intgetMismatchIndex() + +
+          Return the index at which a mismatch occurred.
+ intgetMismatchNode() + +
+          Return the node at which mismatch occurred.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+FSAMatch

+
+public FSAMatch()
+
+
+ + + + + + + + +
+Method Detail
+ +

+getMatchType

+
+public FSAMatchType getMatchType()
+
+
Return match type. +

+

+
+
+
+
+ +

+getMismatchIndex

+
+public int getMismatchIndex()
+
+
Return the index at which a mismatch occurred. +

+

+
See Also:
FSAMatchType
+
+
+
+ +

+getMismatchNode

+
+public int getMismatchNode()
+
+
Return the node at which mismatch occurred. +

+

+
See Also:
FSAMatchType
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/FSAMatchType.html b/doc/api/morfologik/fsa/FSAMatchType.html new file mode 100644 index 0000000..cbee8d4 --- /dev/null +++ b/doc/api/morfologik/fsa/FSAMatchType.html @@ -0,0 +1,395 @@ + + + + + + +FSAMatchType + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Enum FSAMatchType

+
+java.lang.Object
+  extended by java.lang.Enum<FSAMatchType>
+      extended by morfologik.fsa.FSAMatchType
+
+
+
All Implemented Interfaces:
java.io.Serializable, java.lang.Comparable<FSAMatchType>
+
+
+
+
public enum FSAMatchType
extends java.lang.Enum<FSAMatchType>
+ + +

+Type of the match returned as part of FSAMatch. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + +
+Enum Constant Summary
EXACT_MATCH + +
+          The input sequence ends exactly on the final node.
NO_MATCH + +
+          No match for the input sequence of symbols found in the automaton.
PREFIX_FOUND + +
+          The sequence ends on an intermediate automaton node.
PREMATURE_PATH_END_FOUND + +
+          A terminating node occurs in the dictionary before the end of the input + sequence.
PREMATURE_WORD_END_FOUND + +
+          The input sequence ends on an intermediate automaton node.
+  + + + + + + + + + + + + + + + +
+Method Summary
+static FSAMatchTypevalueOf(java.lang.String name) + +
+          Returns the enum constant of this type with the specified name.
+static FSAMatchType[]values() + +
+          Returns an array containing the constants of this enum type, in +the order they are declared.
+ + + + + + + +
Methods inherited from class java.lang.Enum
clone, compareTo, equals, finalize, getDeclaringClass, hashCode, name, ordinal, toString, valueOf
+ + + + + + + +
Methods inherited from class java.lang.Object
getClass, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Enum Constant Detail
+ +

+NO_MATCH

+
+public static final FSAMatchType NO_MATCH
+
+
No match for the input sequence of symbols found in the automaton. +

+

+
+
+
+ +

+EXACT_MATCH

+
+public static final FSAMatchType EXACT_MATCH
+
+
The input sequence ends exactly on the final node. +

+

+
+
+
+ +

+PREMATURE_PATH_END_FOUND

+
+public static final FSAMatchType PREMATURE_PATH_END_FOUND
+
+
A terminating node occurs in the dictionary before the end of the input + sequence. It effectively means a prefix of the input sequence is stored + in the dictionary (e.g., an empty sequence is a prefix of all other + sequences). The result FSAMatch will contain an index of the + first character not present in the dictionary. +

+

+
+
+
+ +

+PREFIX_FOUND

+
+public static final FSAMatchType PREFIX_FOUND
+
+
The sequence ends on an intermediate automaton node. The sequence is + therefore a prefix of at least one other sequence stored in the + dictionary. The result FSAMatch object will contain an index of + the first character in the input sequence not present in the dictionary + and a pointer to the FSA's node where mismatch + occurred. +

+

+
+
+
+ +

+PREMATURE_WORD_END_FOUND

+
+public static final FSAMatchType PREMATURE_WORD_END_FOUND
+
+
The input sequence ends on an intermediate automaton node. This is a + special case of PREFIX_FOUND. A node where the mismatch (missing + input sequence's characters) occurred is returned in the FSAMatch + . +

+

+
+
+ + + + + + + + +
+Method Detail
+ +

+values

+
+public static FSAMatchType[] values()
+
+
Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
+for (FSAMatchType c : FSAMatchType.values())
+    System.out.println(c);
+
+

+

+ +
Returns:
an array containing the constants of this enum type, in +the order they are declared
+
+
+
+ +

+valueOf

+
+public static FSAMatchType valueOf(java.lang.String name)
+
+
Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.) +

+

+
Parameters:
name - the name of the enum constant to be returned. +
Returns:
the enum constant with the specified name +
Throws: +
java.lang.IllegalArgumentException - if this enum type has no constant +with the specified name +
java.lang.NullPointerException - if the argument is null
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/FSATraversalHelper.html b/doc/api/morfologik/fsa/FSATraversalHelper.html new file mode 100644 index 0000000..e493621 --- /dev/null +++ b/doc/api/morfologik/fsa/FSATraversalHelper.html @@ -0,0 +1,358 @@ + + + + + + +FSATraversalHelper + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Class FSATraversalHelper

+
+java.lang.Object
+  extended by morfologik.fsa.FSATraversalHelper
+
+
+
+
public final class FSATraversalHelper
extends java.lang.Object
+ + +

+This class implements some common matching and scanning operations on a + generic FSA. + +

+ Optimized implementations may be provided my specific versions of FSA, + therefore objects of this class should be instantiated via + FSA.getTraversalHelper(). +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ java.util.Iterator<java.nio.ByteBuffer>getAllSubsequences(int node) + +
+          Returns an Iterator of all subsequences available from the given + node to all reachable final states.
+ FSAFinalStatesIteratorgetFinalStatesIterator() + +
+          Returns a new iterator for walking along the final states of this FSA.
+ FSAMatchmatchSequence(byte[] sequence) + +
+           
+ FSAMatchmatchSequence(byte[] sequence, + int node) + +
+           
+ FSAMatchmatchSequence(byte[] sequence, + int start, + int length, + int node) + +
+          Finds a matching path in the dictionary for a given sequence of labels + from sequence and starting at node node.
+ FSAMatchmatchSequence(FSAMatch result, + byte[] sequence, + int start, + int length, + int node) + +
+          Same as matchSequence(byte[], int, int, int), but allows passing + a reusable FSAMatch object so that no intermediate garbage is + produced.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+getAllSubsequences

+
+public java.util.Iterator<java.nio.ByteBuffer> getAllSubsequences(int node)
+
+
Returns an Iterator of all subsequences available from the given + node to all reachable final states. +

+

+
+
+
+
+ +

+getFinalStatesIterator

+
+public FSAFinalStatesIterator getFinalStatesIterator()
+
+
Returns a new iterator for walking along the final states of this FSA. + The iterator is initially set to walk along all final states reachable + from the root node. +

+

+
+
+
+
+ +

+matchSequence

+
+public FSAMatch matchSequence(FSAMatch result,
+                              byte[] sequence,
+                              int start,
+                              int length,
+                              int node)
+
+
Same as matchSequence(byte[], int, int, int), but allows passing + a reusable FSAMatch object so that no intermediate garbage is + produced. +

+

+ +
Returns:
The same object as result, but with reset internal + type and other fields.
+
+
+
+ +

+matchSequence

+
+public FSAMatch matchSequence(byte[] sequence,
+                              int start,
+                              int length,
+                              int node)
+
+
Finds a matching path in the dictionary for a given sequence of labels + from sequence and starting at node node. +

+

+
Parameters:
sequence - An array of labels to follow in the FSA.
start - Starting index in sequence.
length - How many symbols to consider from sequence?
node - Start node identifier in the FSA.
See Also:
matchSequence(byte [], int)
+
+
+
+ +

+matchSequence

+
+public FSAMatch matchSequence(byte[] sequence,
+                              int node)
+
+
+
See Also:
matchSequence(byte[], int, int, int)
+
+
+
+ +

+matchSequence

+
+public FSAMatch matchSequence(byte[] sequence)
+
+
+
See Also:
matchSequence(byte[], int, int, int)
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/FSAVer5Impl.html b/doc/api/morfologik/fsa/FSAVer5Impl.html new file mode 100644 index 0000000..8c3265e --- /dev/null +++ b/doc/api/morfologik/fsa/FSAVer5Impl.html @@ -0,0 +1,686 @@ + + + + + + +FSAVer5Impl + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.fsa +
+Class FSAVer5Impl

+
+java.lang.Object
+  extended by morfologik.fsa.FSA
+      extended by morfologik.fsa.FSAVer5Impl
+
+
+
All Implemented Interfaces:
java.lang.Iterable<java.nio.ByteBuffer>
+
+
+
+
public final class FSAVer5Impl
extends FSA
+ + +

+FSA (Finite State Automaton) dictionary traversal implementation for version + 5 of the FSA automaton. + +

+ Version 5 indicates the dictionary was built with these flags: + FSAFlags.FLEXIBLE, FSAFlags.STOPBIT and + FSAFlags.NEXTBIT. The internal representation of the FSA must + therefore follow this description (please note this format describes only a + single transition (arc), not the entire dictionary file). + +

+ Byte
+       +-+-+-+-+-+-+-+-+\
+     0 | | | | | | | | | +------ label
+       +-+-+-+-+-+-+-+-+/
+ 
+                  +------------- node pointed to is next
+                  | +----------- the last arc of the node
+                  | | +--------- the arc is final
+                  | | |
+             +-----------+
+             |    | | |  |
+         ___+___  | | |  |
+        /       \ | | |  |
+       MSB           LSB |
+        7 6 5 4 3 2 1 0  |
+       +-+-+-+-+-+-+-+-+ |
+     1 | | | | | | | | | \ \
+       +-+-+-+-+-+-+-+-+  \ \  LSB
+       +-+-+-+-+-+-+-+-+     +
+     2 | | | | | | | | |     |
+       +-+-+-+-+-+-+-+-+     |
+     3 | | | | | | | | |     +----- target node address (in bytes)
+       +-+-+-+-+-+-+-+-+     |      (not present except for the byte
+       : : : : : : : : :     |       with flags if the node pointed to
+       +-+-+-+-+-+-+-+-+     +       is next)
+   gtl | | | | | | | | |    /  MSB
+       +-+-+-+-+-+-+-+-+   /
+ gtl+1                           (gtl = gotoLength)
+ 
+

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + +
+Field Summary
+protected  byte[]arcs + +
+          An array of bytes with the internal representation of the automaton.
+protected  intarcSize + +
+          Size of a single arc (in bytes).
+protected static intgotoOffset + +
+          An offset in the arc structure, where the address field begins.
+ + + + + + + +
Fields inherited from class morfologik.fsa.FSA
filler, gotoLength, version, VERSION_5
+  + + + + + + + + + + +
+Constructor Summary
FSAVer5Impl(java.io.InputStream fsaStream, + java.lang.String dictionaryEncoding) + +
+          Creates a new automaton reading it from a file in FSA format, version 5.
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ intgetArc(int node, + byte label) + +
+          Returns the identifier of an arc leaving node and labeled + with label.
+ bytegetArcLabel(int arc) + +
+          Return the label associated with a given arc.
+ intgetEndNode(int arc) + +
+          Return the end node pointed to by a given arc.
+ intgetFirstArc(int node) + +
+          Returns the identifier of the first arc leaving node or 0 if + the node has no outgoing arcs.
+ intgetNextArc(int node, + int arc) + +
+          Returns the identifier of the next arc after arc and leaving + node.
+ intgetNumberOfArcs() + +
+          Returns the number of arcs in this automaton.
+ intgetNumberOfNodes() + +
+          Returns the number of nodes in this automaton.
+ intgetRootNode() + +
+          Returns the start node of this automaton.
+ booleanisArcFinal(int arc) + +
+          Returns true if the destination node at the end of this + arc corresponds to an input sequence created when building + this automaton.
+ booleanisArcTerminal(int arc) + +
+          Returns true if this arc does not have a + terminating node.
+protected  voidreadHeader(java.io.DataInput in, + long fileSize) + +
+          Reads a FSA header from a stream.
+ + + + + + + +
Methods inherited from class morfologik.fsa.FSA
getAnnotationSeparator, getFillerCharacter, getFlags, getInstance, getInstance, getTraversalHelper, getVersion, iterator, readFully
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+arcSize

+
+protected int arcSize
+
+
Size of a single arc (in bytes). +

+

+
+
+
+ +

+gotoOffset

+
+protected static final int gotoOffset
+
+
An offset in the arc structure, where the address field begins. For this + version of the automaton, this is a constant value. +

+

+
See Also:
Constant Field Values
+
+
+ +

+arcs

+
+protected byte[] arcs
+
+
An array of bytes with the internal representation of the automaton. + Please see the documentation of this class for more information on how + this structure is organized. +

+

+
+
+ + + + + + + + +
+Constructor Detail
+ +

+FSAVer5Impl

+
+public FSAVer5Impl(java.io.InputStream fsaStream,
+                   java.lang.String dictionaryEncoding)
+            throws java.io.IOException
+
+
Creates a new automaton reading it from a file in FSA format, version 5. +

+

+ +
Throws: +
java.io.IOException
+
+ + + + + + + + +
+Method Detail
+ +

+getNumberOfArcs

+
+public int getNumberOfArcs()
+
+
Returns the number of arcs in this automaton. This method performs a full + scan of all arcs in this automaton. +

+

+
Specified by:
getNumberOfArcs in class FSA
+
+
+
+
+
+
+ +

+getNumberOfNodes

+
+public int getNumberOfNodes()
+
+
Returns the number of nodes in this automaton. This method performs a + full scan of all arcs in this automaton. +

+

+
Specified by:
getNumberOfNodes in class FSA
+
+
+
+
+
+
+ +

+getRootNode

+
+public int getRootNode()
+
+
Returns the start node of this automaton. May return 0 if + the start node is also an end node. +

+

+
Specified by:
getRootNode in class FSA
+
+
+
See Also:
FSA.getTraversalHelper()
+
+
+
+ +

+readHeader

+
+protected void readHeader(java.io.DataInput in,
+                          long fileSize)
+                   throws java.io.IOException
+
+
Reads a FSA header from a stream. +

+

+
Overrides:
readHeader in class FSA
+
+
+ +
Throws: +
java.io.IOException - If the stream is not a dictionary, or if the version is not + supported.
+
+
+
+ +

+getFirstArc

+
+public final int getFirstArc(int node)
+
+
Description copied from class: FSA
+
Returns the identifier of the first arc leaving node or 0 if + the node has no outgoing arcs. +

+

+
Specified by:
getFirstArc in class FSA
+
+
+
See Also:
FSA.getTraversalHelper()
+
+
+
+ +

+getNextArc

+
+public final int getNextArc(int node,
+                            int arc)
+
+
Description copied from class: FSA
+
Returns the identifier of the next arc after arc and leaving + node. Zero is returned if no more arcs are available for the + node. +

+

+
Specified by:
getNextArc in class FSA
+
+
+
See Also:
FSA.getTraversalHelper()
+
+
+
+ +

+getArc

+
+public int getArc(int node,
+                  byte label)
+
+
Description copied from class: FSA
+
Returns the identifier of an arc leaving node and labeled + with label. An identifier equal to 0 means the node has no + outgoing arc labeled label. +

+

+
Specified by:
getArc in class FSA
+
+
+
See Also:
FSA.getTraversalHelper()
+
+
+
+ +

+getEndNode

+
+public int getEndNode(int arc)
+
+
Description copied from class: FSA
+
Return the end node pointed to by a given arc. Terminal arcs + (those that point to a terminal state) have no end node representation + and throw a runtime exception. +

+

+
Specified by:
getEndNode in class FSA
+
+
+
See Also:
FSA.getTraversalHelper()
+
+
+
+ +

+getArcLabel

+
+public byte getArcLabel(int arc)
+
+
Description copied from class: FSA
+
Return the label associated with a given arc. +

+

+
Specified by:
getArcLabel in class FSA
+
+
+
+
+
+
+ +

+isArcFinal

+
+public boolean isArcFinal(int arc)
+
+
Description copied from class: FSA
+
Returns true if the destination node at the end of this + arc corresponds to an input sequence created when building + this automaton. +

+

+
Specified by:
isArcFinal in class FSA
+
+
+
+
+
+
+ +

+isArcTerminal

+
+public boolean isArcTerminal(int arc)
+
+
Description copied from class: FSA
+
Returns true if this arc does not have a + terminating node. +

+

+
Specified by:
isArcTerminal in class FSA
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/package-frame.html b/doc/api/morfologik/fsa/package-frame.html new file mode 100644 index 0000000..94451c6 --- /dev/null +++ b/doc/api/morfologik/fsa/package-frame.html @@ -0,0 +1,55 @@ + + + + + + +morfologik.fsa + + + + + + + + + + + +morfologik.fsa + + + + +
+Classes  + +
+FSA +
+FSAFinalStatesIterator +
+FSAHelpers +
+FSAMatch +
+FSATraversalHelper +
+FSAVer5Impl
+ + + + + + +
+Enums  + +
+FSAFlags +
+FSAMatchType
+ + + + diff --git a/doc/api/morfologik/fsa/package-summary.html b/doc/api/morfologik/fsa/package-summary.html new file mode 100644 index 0000000..9f23cfe --- /dev/null +++ b/doc/api/morfologik/fsa/package-summary.html @@ -0,0 +1,200 @@ + + + + + + +morfologik.fsa + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package morfologik.fsa +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
FSAThis class implements Finite State Automaton traversal as described in Jan + Daciuk's Incremental Construction of Finite-State Automata and + Transducers, and Their Use in the Natural Language Processing (PhD + thesis, Technical University of Gdansk).
FSAFinalStatesIteratorAn iterator that traverses all final states reachable from a given + node and returns byte sequences corresponding to final states.
FSAHelpersThis class has several static utility methods for use + with the FSA package.
FSAMatchA matching result returned from FSATraversalHelper.
FSATraversalHelperThis class implements some common matching and scanning operations on a + generic FSA.
FSAVer5ImplFSA (Finite State Automaton) dictionary traversal implementation for version + 5 of the FSA automaton.
+  + +

+ + + + + + + + + + + + + +
+Enum Summary
FSAFlagsFSA automaton flags.
FSAMatchTypeType of the match returned as part of FSAMatch.
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/fsa/package-tree.html b/doc/api/morfologik/fsa/package-tree.html new file mode 100644 index 0000000..0714763 --- /dev/null +++ b/doc/api/morfologik/fsa/package-tree.html @@ -0,0 +1,165 @@ + + + + + + +morfologik.fsa Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package morfologik.fsa +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Enum Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/Dictionary.html b/doc/api/morfologik/stemming/Dictionary.html new file mode 100644 index 0000000..50f1f58 --- /dev/null +++ b/doc/api/morfologik/stemming/Dictionary.html @@ -0,0 +1,489 @@ + + + + + + +Dictionary + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.stemming +
+Class Dictionary

+
+java.lang.Object
+  extended by morfologik.stemming.Dictionary
+
+
+
+
public final class Dictionary
extends java.lang.Object
+ + +

+A dictionary combines FSA automaton and metadata describing the + internals of dictionary entries' coding (DictionaryMetadata. + +

+ A dictionary consists of two files: +

+ Use static methods in this class to read dictionaries and their metadata. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+static java.util.WeakHashMap<java.lang.String,Dictionary>defaultDictionaries + +
+          Default loaded dictionaries.
+ FSAfsa + +
+          FSA automaton with the compiled dictionary data.
+ DictionaryMetadatametadata + +
+          Metadata associated with the dictionary.
+static java.lang.StringMETADATA_FILE_EXTENSION + +
+          Expected metadata file extension.
+  + + + + + + + + + + +
+Constructor Summary
Dictionary(FSA fsa, + DictionaryMetadata metadata) + +
+          It is strongly recommended to use static methods in this class for + reading dictionaries.
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static java.lang.StringgetExpectedFeaturesName(java.lang.String name) + +
+          Returns the expected name of the metadata file, based on the name of the + FSA dictionary file.
+static DictionarygetForLanguage(java.lang.String languageCode) + +
+          Return a built-in dictionary for a given ISO language code.
+static Dictionaryread(java.io.File fsaFile) + +
+          Attempts to load a dictionary using the path to the FSA file and the + expected metadata extension.
+static Dictionaryread(java.net.URL fsaURL) + +
+           + Attempts to load a dictionary using the URL to the FSA file and the + expected metadata extension.
+static DictionaryreadAndClose(java.io.InputStream fsaData, + java.io.InputStream featuresData) + +
+          Attempts to load a dictionary from opened streams of FSA dictionary data + and associated metadata.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+METADATA_FILE_EXTENSION

+
+public static final java.lang.String METADATA_FILE_EXTENSION
+
+
Expected metadata file extension. +

+

+
See Also:
Constant Field Values
+
+
+ +

+fsa

+
+public final FSA fsa
+
+
FSA automaton with the compiled dictionary data. +

+

+
+
+
+ +

+metadata

+
+public final DictionaryMetadata metadata
+
+
Metadata associated with the dictionary. +

+

+
+
+
+ +

+defaultDictionaries

+
+public static final java.util.WeakHashMap<java.lang.String,Dictionary> defaultDictionaries
+
+
Default loaded dictionaries. +

+

+
+
+ + + + + + + + +
+Constructor Detail
+ +

+Dictionary

+
+public Dictionary(FSA fsa,
+                  DictionaryMetadata metadata)
+
+
It is strongly recommended to use static methods in this class for + reading dictionaries. +

+

+
Parameters:
fsa - An instantiated FSA instance.
metadata - A map of attributes describing the compression format and + other settings not contained in the FSA automaton. For an + explanation of available attributes and their possible values, + see DictionaryMetadata.
+
+ + + + + + + + +
+Method Detail
+ +

+read

+
+public static Dictionary read(java.io.File fsaFile)
+                       throws java.io.IOException
+
+
Attempts to load a dictionary using the path to the FSA file and the + expected metadata extension. +

+

+ +
Throws: +
java.io.IOException
+
+
+
+ +

+read

+
+public static Dictionary read(java.net.URL fsaURL)
+                       throws java.io.IOException
+
+

+ Attempts to load a dictionary using the URL to the FSA file and the + expected metadata extension. + +

+ This method can be used to load resource-based dictionaries, but be aware + of JAR resource-locking issues that arise from resource URLs. +

+

+ +
Throws: +
java.io.IOException
+
+
+
+ +

+readAndClose

+
+public static Dictionary readAndClose(java.io.InputStream fsaData,
+                                      java.io.InputStream featuresData)
+                               throws java.io.IOException
+
+
Attempts to load a dictionary from opened streams of FSA dictionary data + and associated metadata. +

+

+ +
Throws: +
java.io.IOException
+
+
+
+ +

+getExpectedFeaturesName

+
+public static java.lang.String getExpectedFeaturesName(java.lang.String name)
+
+
Returns the expected name of the metadata file, based on the name of the + FSA dictionary file. The expected name is resolved by truncating any + suffix of name and appending + METADATA_FILE_EXTENSION. +

+

+
+
+
+
+ +

+getForLanguage

+
+public static Dictionary getForLanguage(java.lang.String languageCode)
+
+
Return a built-in dictionary for a given ISO language code. Dictionaries + are cached internally for potential reuse. +

+

+ +
Throws: +
java.lang.RuntimeException - Throws a RuntimeException if the dictionary is not + bundled with the library.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/DictionaryIterator.html b/doc/api/morfologik/stemming/DictionaryIterator.html new file mode 100644 index 0000000..12f5b74 --- /dev/null +++ b/doc/api/morfologik/stemming/DictionaryIterator.html @@ -0,0 +1,309 @@ + + + + + + +DictionaryIterator + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.stemming +
+Class DictionaryIterator

+
+java.lang.Object
+  extended by morfologik.stemming.DictionaryIterator
+
+
+
All Implemented Interfaces:
java.util.Iterator<WordData>
+
+
+
+
public final class DictionaryIterator
extends java.lang.Object
implements java.util.Iterator<WordData>
+ + +

+An iterator over WordData entries of a Dictionary. The stems + can be decoded from compressed format or the compressed form can be preserved. +

+ +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
DictionaryIterator(Dictionary dictionary, + java.nio.charset.CharsetDecoder decoder, + boolean decodeStems) + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ booleanhasNext() + +
+           
+ WordDatanext() + +
+           
+ voidremove() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+DictionaryIterator

+
+public DictionaryIterator(Dictionary dictionary,
+                          java.nio.charset.CharsetDecoder decoder,
+                          boolean decodeStems)
+
+
+ + + + + + + + +
+Method Detail
+ +

+hasNext

+
+public boolean hasNext()
+
+
+
Specified by:
hasNext in interface java.util.Iterator<WordData>
+
+
+
+
+
+
+ +

+next

+
+public WordData next()
+
+
+
Specified by:
next in interface java.util.Iterator<WordData>
+
+
+
+
+
+
+ +

+remove

+
+public void remove()
+
+
+
Specified by:
remove in interface java.util.Iterator<WordData>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/DictionaryLookup.html b/doc/api/morfologik/stemming/DictionaryLookup.html new file mode 100644 index 0000000..85fe27e --- /dev/null +++ b/doc/api/morfologik/stemming/DictionaryLookup.html @@ -0,0 +1,375 @@ + + + + + + +DictionaryLookup + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.stemming +
+Class DictionaryLookup

+
+java.lang.Object
+  extended by morfologik.stemming.DictionaryLookup
+
+
+
All Implemented Interfaces:
java.lang.Iterable<WordData>, IStemmer
+
+
+
+
public final class DictionaryLookup
extends java.lang.Object
implements IStemmer, java.lang.Iterable<WordData>
+ + +

+This class implements a dictionary lookup over an FSA dictionary. The + dictionary for this class should be prepared from a text file using Jan + Daciuk's FSA package (see link below). + +

+ Important: finite state automatons in Jan Daciuk's implementation use + bytes not unicode characters. Therefore objects of this class always + have to be constructed with an encoding used to convert Java strings to byte + arrays and the other way around. You can use UTF-8 encoding, as it + should not conflict with any control sequences and separator characters. +

+ +

+

+
See Also:
FSA package Web site
+
+ +

+ + + + + + + + + + + +
+Constructor Summary
DictionaryLookup(Dictionary dictionary) + +
+           + Creates a new object of this class using the given FSA for word lookups + and encoding for converting characters to bytes.
+  + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static java.nio.ByteBufferdecodeStem(java.nio.ByteBuffer bb, + byte[] bytes, + int len, + java.nio.ByteBuffer inflectedBuffer, + DictionaryMetadata metadata) + +
+          Decode the base form of an inflected word and save its decoded form into + a byte buffer.
+ DictionarygetDictionary() + +
+           
+ java.util.Iterator<WordData>iterator() + +
+          Return an iterator over all WordData entries available in + the embedded Dictionary.
+ java.util.List<WordData>lookup(java.lang.CharSequence word) + +
+          Searches the automaton for a symbol sequence equal to word, + followed by a separator.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+DictionaryLookup

+
+public DictionaryLookup(Dictionary dictionary)
+                 throws java.lang.IllegalArgumentException
+
+

+ Creates a new object of this class using the given FSA for word lookups + and encoding for converting characters to bytes. +

+

+ +
Throws: +
java.lang.IllegalArgumentException - if FSA's root node cannot be acquired (dictionary is empty).
+
+ + + + + + + + +
+Method Detail
+ +

+lookup

+
+public java.util.List<WordData> lookup(java.lang.CharSequence word)
+
+
Searches the automaton for a symbol sequence equal to word, + followed by a separator. The result is a stem (decompressed accordingly + to the dictionary's specification) and an optional tag data. +

+

+
Specified by:
lookup in interface IStemmer
+
+
+
+
+
+
+ +

+decodeStem

+
+public static java.nio.ByteBuffer decodeStem(java.nio.ByteBuffer bb,
+                                             byte[] bytes,
+                                             int len,
+                                             java.nio.ByteBuffer inflectedBuffer,
+                                             DictionaryMetadata metadata)
+
+
Decode the base form of an inflected word and save its decoded form into + a byte buffer. +

+

+
+
+
+
Parameters:
bb - The byte buffer to save the result to. A new buffer may be + allocated if the capacity of bb is not large + enough to store the result. The buffer is not flipped upon + return.
inflectedBuffer - Inflected form's bytes (decoded properly).
bytes - Bytes of the encoded base form, starting at 0 index.
len - Length of the encode base form. +
Returns:
Returns either bb or a new buffer whose capacity is + large enough to store the output of the decoded data.
+
+
+
+ +

+iterator

+
+public java.util.Iterator<WordData> iterator()
+
+
Return an iterator over all WordData entries available in + the embedded Dictionary. +

+

+
Specified by:
iterator in interface java.lang.Iterable<WordData>
+
+
+
+
+
+
+ +

+getDictionary

+
+public Dictionary getDictionary()
+
+
+
+
+
+ +
Returns:
Return the Dictionary used by this object.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/DictionaryMetadata.html b/doc/api/morfologik/stemming/DictionaryMetadata.html new file mode 100644 index 0000000..27b6912 --- /dev/null +++ b/doc/api/morfologik/stemming/DictionaryMetadata.html @@ -0,0 +1,437 @@ + + + + + + +DictionaryMetadata + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.stemming +
+Class DictionaryMetadata

+
+java.lang.Object
+  extended by morfologik.stemming.DictionaryMetadata
+
+
+
+
public final class DictionaryMetadata
extends java.lang.Object
+ + +

+Description of attributes, their types and default values. +

+ +

+

+
See Also:
Dictionary
+
+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Field Summary
+static java.lang.StringATTR_NAME_ENCODING + +
+          Attribute name for encoding.
+static java.lang.StringATTR_NAME_SEPARATOR + +
+          Attribute name for separator.
+static java.lang.StringATTR_NAME_USES_INFIXES + +
+          Attribute name for usesInfixes.
+static java.lang.StringATTR_NAME_USES_PREFIXES + +
+          Attribute name for usesPrefixes.
+ java.lang.Stringencoding + +
+          Encoding used for converting bytes to characters and vice versa.
+ java.util.Map<java.lang.String,java.lang.String>metadata + +
+          Other meta data not included above.
+ byteseparator + +
+          A separator character between fields (stem, lemma, form).
+ booleanusesInfixes + +
+          True if the dictionary was compiled with infix compression.
+ booleanusesPrefixes + +
+          True if the dictionary was compiled with prefix compression.
+  + + + + + + + + + + +
+Constructor Summary
DictionaryMetadata(char separator, + java.lang.String encoding, + boolean usesPrefixes, + boolean usesInfixes, + java.util.Map<java.lang.String,java.lang.String> metadata) + +
+          Creates an immutable instance of DictionaryMetadata.
+  + + + + + + + +
+Method Summary
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+ATTR_NAME_SEPARATOR

+
+public static final java.lang.String ATTR_NAME_SEPARATOR
+
+
Attribute name for separator. +

+

+
See Also:
Constant Field Values
+
+
+ +

+ATTR_NAME_ENCODING

+
+public static final java.lang.String ATTR_NAME_ENCODING
+
+
Attribute name for encoding. +

+

+
See Also:
Constant Field Values
+
+
+ +

+ATTR_NAME_USES_PREFIXES

+
+public static final java.lang.String ATTR_NAME_USES_PREFIXES
+
+
Attribute name for usesPrefixes. +

+

+
See Also:
Constant Field Values
+
+
+ +

+ATTR_NAME_USES_INFIXES

+
+public static final java.lang.String ATTR_NAME_USES_INFIXES
+
+
Attribute name for usesInfixes. +

+

+
See Also:
Constant Field Values
+
+
+ +

+separator

+
+public final byte separator
+
+
A separator character between fields (stem, lemma, form). The character + must be within byte range (FSA uses bytes internally). +

+

+
+
+
+ +

+encoding

+
+public final java.lang.String encoding
+
+
Encoding used for converting bytes to characters and vice versa. +

+

+
+
+
+ +

+usesPrefixes

+
+public final boolean usesPrefixes
+
+
True if the dictionary was compiled with prefix compression. +

+

+
+
+
+ +

+usesInfixes

+
+public final boolean usesInfixes
+
+
True if the dictionary was compiled with infix compression. +

+

+
+
+
+ +

+metadata

+
+public final java.util.Map<java.lang.String,java.lang.String> metadata
+
+
Other meta data not included above. +

+

+
+
+ + + + + + + + +
+Constructor Detail
+ +

+DictionaryMetadata

+
+public DictionaryMetadata(char separator,
+                          java.lang.String encoding,
+                          boolean usesPrefixes,
+                          boolean usesInfixes,
+                          java.util.Map<java.lang.String,java.lang.String> metadata)
+
+
Creates an immutable instance of DictionaryMetadata. +

+

+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/IStemmer.html b/doc/api/morfologik/stemming/IStemmer.html new file mode 100644 index 0000000..c91fd04 --- /dev/null +++ b/doc/api/morfologik/stemming/IStemmer.html @@ -0,0 +1,220 @@ + + + + + + +IStemmer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.stemming +
+Interface IStemmer

+
+
All Known Implementing Classes:
DictionaryLookup, PolishStemmer
+
+
+
+
public interface IStemmer
+ + +

+A generic "stemmer" interface in Morfologik. +

+ +

+


+ +

+ + + + + + + + + + + + +
+Method Summary
+ java.util.List<WordData>lookup(java.lang.CharSequence word) + +
+          Returns a list of WordData entries for a given word.
+  +

+ + + + + + + + +
+Method Detail
+ +

+lookup

+
+java.util.List<WordData> lookup(java.lang.CharSequence word)
+
+
Returns a list of WordData entries for a given word. The returned + list is never null. Depending on the stemmer's + implementation the WordData may carry the stem and additional + information (tag) or just the stem. +

+ The returned list and any object it contains are not usable after a + subsequent call to this method. Any data that should be stored in between + must be copied by the caller. +

+

+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/PolishStemmer.html b/doc/api/morfologik/stemming/PolishStemmer.html new file mode 100644 index 0000000..d48276c --- /dev/null +++ b/doc/api/morfologik/stemming/PolishStemmer.html @@ -0,0 +1,302 @@ + + + + + + +PolishStemmer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.stemming +
+Class PolishStemmer

+
+java.lang.Object
+  extended by morfologik.stemming.PolishStemmer
+
+
+
All Implemented Interfaces:
java.lang.Iterable<WordData>, IStemmer
+
+
+
+
public final class PolishStemmer
extends java.lang.Object
implements IStemmer, java.lang.Iterable<WordData>
+ + +

+A dictionary-based stemmer for the Polish language. This stemmer requires an + FSA-compiled dictionary to be present in classpath resources. + + Objects of this class are not thread safe. +

+ +

+

+
See Also:
DictionaryLookup
+
+ +

+ + + + + + + + + + + +
+Constructor Summary
PolishStemmer() + +
+          This constructor is initialized with a built-in dictionary or fails with + a runtime exception if the dictionary is not available.
+  + + + + + + + + + + + + + + + +
+Method Summary
+ java.util.Iterator<WordData>iterator() + +
+          Iterates over all dictionary forms stored in this stemmer.
+ java.util.List<WordData>lookup(java.lang.CharSequence word) + +
+          Returns a list of WordData entries for a given word.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+PolishStemmer

+
+public PolishStemmer()
+
+
This constructor is initialized with a built-in dictionary or fails with + a runtime exception if the dictionary is not available. +

+

+ + + + + + + + +
+Method Detail
+ +

+lookup

+
+public java.util.List<WordData> lookup(java.lang.CharSequence word)
+
+
Returns a list of WordData entries for a given word. The returned + list is never null. Depending on the stemmer's + implementation the WordData may carry the stem and additional + information (tag) or just the stem. +

+ The returned list and any object it contains are not usable after a + subsequent call to this method. Any data that should be stored in between + must be copied by the caller. +

+

+
Specified by:
lookup in interface IStemmer
+
+
+
+
+
+
+ +

+iterator

+
+public java.util.Iterator<WordData> iterator()
+
+
Iterates over all dictionary forms stored in this stemmer. +

+

+
Specified by:
iterator in interface java.lang.Iterable<WordData>
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/WordData.html b/doc/api/morfologik/stemming/WordData.html new file mode 100644 index 0000000..14e07a1 --- /dev/null +++ b/doc/api/morfologik/stemming/WordData.html @@ -0,0 +1,447 @@ + + + + + + +WordData + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.stemming +
+Class WordData

+
+java.lang.Object
+  extended by morfologik.stemming.WordData
+
+
+
All Implemented Interfaces:
java.lang.Cloneable
+
+
+
+
public final class WordData
extends java.lang.Object
implements java.lang.Cloneable
+ + +

+Stem and tag data associated with a given word. + +

+ Important notes: +

+

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected  WordDataclone() + +
+          Declare a covariant of Object.clone() that returns a deep copy + of this object.
+ booleanequals(java.lang.Object obj) + +
+           
+ java.lang.CharSequencegetStem() + +
+           
+ java.nio.ByteBuffergetStemBytes(java.nio.ByteBuffer target) + +
+          Copy the stem's binary data (no charset decoding) to a custom byte + buffer.
+ java.lang.CharSequencegetTag() + +
+           
+ java.nio.ByteBuffergetTagBytes(java.nio.ByteBuffer target) + +
+          Copy the tag's binary data (no charset decoding) to a custom byte buffer.
+ java.lang.CharSequencegetWord() + +
+           
+ java.nio.ByteBuffergetWordBytes(java.nio.ByteBuffer target) + +
+          Copy the inflected word's binary data (no charset decoding) to a custom + byte buffer.
+ inthashCode() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
finalize, getClass, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+getStemBytes

+
+public java.nio.ByteBuffer getStemBytes(java.nio.ByteBuffer target)
+
+
Copy the stem's binary data (no charset decoding) to a custom byte + buffer. If the buffer is null or not large enough to hold the result, a + new buffer is allocated. +

+

+
+
+
+
Parameters:
target - Target byte buffer to copy the stem buffer to or + null if a new buffer should be allocated. +
Returns:
Returns target or the new reallocated buffer.
+
+
+
+ +

+getTagBytes

+
+public java.nio.ByteBuffer getTagBytes(java.nio.ByteBuffer target)
+
+
Copy the tag's binary data (no charset decoding) to a custom byte buffer. + If the buffer is null or not large enough to hold the result, a new + buffer is allocated. +

+

+
+
+
+
Parameters:
target - Target byte buffer to copy the tag buffer to or + null if a new buffer should be allocated. +
Returns:
Returns target or the new reallocated buffer.
+
+
+
+ +

+getWordBytes

+
+public java.nio.ByteBuffer getWordBytes(java.nio.ByteBuffer target)
+
+
Copy the inflected word's binary data (no charset decoding) to a custom + byte buffer. If the buffer is null or not large enough to hold the + result, a new buffer is allocated. +

+

+
+
+
+
Parameters:
target - Target byte buffer to copy the word buffer to or + null if a new buffer should be allocated. +
Returns:
Returns target or the new reallocated buffer.
+
+
+
+ +

+getTag

+
+public java.lang.CharSequence getTag()
+
+
+
+
+
+ +
Returns:
Return tag data decoded to a character sequence or + null if no associated tag data exists.
+
+
+
+ +

+getStem

+
+public java.lang.CharSequence getStem()
+
+
+
+
+
+ +
Returns:
Return stem data decoded to a character sequence or + null if no associated stem data exists.
+
+
+
+ +

+getWord

+
+public java.lang.CharSequence getWord()
+
+
+
+
+
+ +
Returns:
Return inflected word form data. Usually the parameter + passed to DictionaryLookup.lookup(CharSequence).
+
+
+
+ +

+equals

+
+public boolean equals(java.lang.Object obj)
+
+
+
Overrides:
equals in class java.lang.Object
+
+
+
+
+
+
+ +

+hashCode

+
+public int hashCode()
+
+
+
Overrides:
hashCode in class java.lang.Object
+
+
+
+
+
+
+ +

+clone

+
+protected WordData clone()
+
+
Declare a covariant of Object.clone() that returns a deep copy + of this object. The content of all internal buffers is copied. +

+

+
Overrides:
clone in class java.lang.Object
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/package-frame.html b/doc/api/morfologik/stemming/package-frame.html new file mode 100644 index 0000000..62a0afe --- /dev/null +++ b/doc/api/morfologik/stemming/package-frame.html @@ -0,0 +1,53 @@ + + + + + + +morfologik.stemming + + + + + + + + + + + +morfologik.stemming + + + + +
+Interfaces  + +
+IStemmer
+ + + + + + +
+Classes  + +
+Dictionary +
+DictionaryIterator +
+DictionaryLookup +
+DictionaryMetadata +
+PolishStemmer +
+WordData
+ + + + diff --git a/doc/api/morfologik/stemming/package-summary.html b/doc/api/morfologik/stemming/package-summary.html new file mode 100644 index 0000000..613d016 --- /dev/null +++ b/doc/api/morfologik/stemming/package-summary.html @@ -0,0 +1,190 @@ + + + + + + +morfologik.stemming + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package morfologik.stemming +

+ + + + + + + + + +
+Interface Summary
IStemmerA generic "stemmer" interface in Morfologik.
+  + +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
DictionaryA dictionary combines FSA automaton and metadata describing the + internals of dictionary entries' coding (DictionaryMetadata.
DictionaryIteratorAn iterator over WordData entries of a Dictionary.
DictionaryLookupThis class implements a dictionary lookup over an FSA dictionary.
DictionaryMetadataDescription of attributes, their types and default values.
PolishStemmerA dictionary-based stemmer for the Polish language.
WordDataStem and tag data associated with a given word.
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/stemming/package-tree.html b/doc/api/morfologik/stemming/package-tree.html new file mode 100644 index 0000000..6a7904e --- /dev/null +++ b/doc/api/morfologik/stemming/package-tree.html @@ -0,0 +1,160 @@ + + + + + + +morfologik.stemming Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package morfologik.stemming +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +

+Interface Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/tools/DumpTool.html b/doc/api/morfologik/tools/DumpTool.html new file mode 100644 index 0000000..04cdc03 --- /dev/null +++ b/doc/api/morfologik/tools/DumpTool.html @@ -0,0 +1,436 @@ + + + + + + +DumpTool + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.tools +
+Class DumpTool

+
+java.lang.Object
+  extended by morfologik.tools.DumpTool
+
+
+
+
public final class DumpTool
extends java.lang.Object
+ + +

+This utility will dump the information and contents of a given FSA + dictionary. It can dump dictionaries in the raw form (as fed to the + fsa_build program) or decoding compressed stem forms. +

+ +

+


+ +

+ + + + + + + + + + + +
+Field Summary
+protected  org.apache.commons.cli.Optionsoptions + +
+          Command line options.
+  + + + + + + + + + + +
+Constructor Summary
DumpTool() + +
+           
+  + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+protected  voidgo(org.apache.commons.cli.CommandLine line) + +
+          Command line entry point after parsing arguments.
+protected  voidgo(java.lang.String[] args) + +
+          Initializes application context.
+protected  voidinitializeOptions(org.apache.commons.cli.Options options) + +
+          Command line options for the tool.
+static voidmain(java.lang.String[] args) + +
+          Command line entry point.
+protected  voidprintError(java.lang.String msg) + +
+          Print an error without an exception.
+protected  voidprintError(java.lang.String msg, + java.lang.Throwable t) + +
+          Print an error and an associated exception.
+protected  voidprintUsage() + +
+          Prints usage (options).
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Field Detail
+ +

+options

+
+protected final org.apache.commons.cli.Options options
+
+
Command line options. +

+

+
+
+ + + + + + + + +
+Constructor Detail
+ +

+DumpTool

+
+public DumpTool()
+
+
+ + + + + + + + +
+Method Detail
+ +

+go

+
+protected void go(org.apache.commons.cli.CommandLine line)
+           throws java.lang.Exception
+
+
Command line entry point after parsing arguments. +

+

+
+
+
+ +
Throws: +
java.lang.Exception
+
+
+
+ +

+initializeOptions

+
+protected void initializeOptions(org.apache.commons.cli.Options options)
+
+
Command line options for the tool. +

+

+
+
+
+
+
+
+
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws java.lang.Exception
+
+
Command line entry point. +

+

+ +
Throws: +
java.lang.Exception
+
+
+
+ +

+go

+
+protected final void go(java.lang.String[] args)
+
+
Initializes application context. +

+

+
+
+
+
+ +

+printError

+
+protected void printError(java.lang.String msg,
+                          java.lang.Throwable t)
+
+
Print an error and an associated exception. +

+

+
+
+
+
+ +

+printError

+
+protected void printError(java.lang.String msg)
+
+
Print an error without an exception. +

+

+
+
+
+
+ +

+printUsage

+
+protected void printUsage()
+
+
Prints usage (options). +

+

+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/tools/InflectionFramesTool.html b/doc/api/morfologik/tools/InflectionFramesTool.html new file mode 100644 index 0000000..0268184 --- /dev/null +++ b/doc/api/morfologik/tools/InflectionFramesTool.html @@ -0,0 +1,279 @@ + + + + + + +InflectionFramesTool + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.tools +
+Class InflectionFramesTool

+
+java.lang.Object
+  extended by morfologik.tools.InflectionFramesTool
+
+
+
+
public class InflectionFramesTool
extends java.lang.Object
+ + +

+Calculate inflection frames from the Polish dictionary. +

+ +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
InflectionFramesTool() + +
+           
+  + + + + + + + + + + + + + + + +
+Method Summary
+ voidinflectionFrames() + +
+           
+static voidmain(java.lang.String[] args) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+InflectionFramesTool

+
+public InflectionFramesTool()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+
+ +

+inflectionFrames

+
+public void inflectionFrames()
+                      throws java.io.IOException
+
+
+ +
Throws: +
java.io.IOException
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/tools/Launcher.html b/doc/api/morfologik/tools/Launcher.html new file mode 100644 index 0000000..0acdf8e --- /dev/null +++ b/doc/api/morfologik/tools/Launcher.html @@ -0,0 +1,259 @@ + + + + + + +Launcher + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.tools +
+Class Launcher

+
+java.lang.Object
+  extended by morfologik.tools.Launcher
+
+
+
+
public final class Launcher
extends java.lang.Object
+ + +

+A launcher for other command-line tools. +

+ +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
Launcher() + +
+           
+  + + + + + + + + + + + +
+Method Summary
+static voidmain(java.lang.String[] args) + +
+          Command line entry point.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+Launcher

+
+public Launcher()
+
+
+ + + + + + + + +
+Method Detail
+ +

+main

+
+public static void main(java.lang.String[] args)
+                 throws java.lang.Exception
+
+
Command line entry point. +

+

+ +
Throws: +
java.lang.Exception
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/tools/package-frame.html b/doc/api/morfologik/tools/package-frame.html new file mode 100644 index 0000000..10e2408 --- /dev/null +++ b/doc/api/morfologik/tools/package-frame.html @@ -0,0 +1,36 @@ + + + + + + +morfologik.tools + + + + + + + + + + + +morfologik.tools + + + + +
+Classes  + +
+DumpTool +
+InflectionFramesTool +
+Launcher
+ + + + diff --git a/doc/api/morfologik/tools/package-summary.html b/doc/api/morfologik/tools/package-summary.html new file mode 100644 index 0000000..8150a9e --- /dev/null +++ b/doc/api/morfologik/tools/package-summary.html @@ -0,0 +1,164 @@ + + + + + + +morfologik.tools + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package morfologik.tools +

+ + + + + + + + + + + + + + + + + +
+Class Summary
DumpToolThis utility will dump the information and contents of a given FSA + dictionary.
InflectionFramesToolCalculate inflection frames from the Polish dictionary.
LauncherA launcher for other command-line tools.
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/tools/package-tree.html b/doc/api/morfologik/tools/package-tree.html new file mode 100644 index 0000000..bc41772 --- /dev/null +++ b/doc/api/morfologik/tools/package-tree.html @@ -0,0 +1,151 @@ + + + + + + +morfologik.tools Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package morfologik.tools +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/util/Arrays.html b/doc/api/morfologik/util/Arrays.html new file mode 100644 index 0000000..830904c --- /dev/null +++ b/doc/api/morfologik/util/Arrays.html @@ -0,0 +1,302 @@ + + + + + + +Arrays + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.util +
+Class Arrays

+
+java.lang.Object
+  extended by morfologik.util.Arrays
+
+
+
+
public final class Arrays
extends java.lang.Object
+ + +

+Compatibility layer for JVM 1.5. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + + + + + + + + + +
+Method Summary
+static byte[]copyOf(byte[] original, + int newLength) + +
+           
+static int[]copyOf(int[] original, + int newLength) + +
+           
+static + + + + +
+<T> T[]
+
copyOf(T[] original, + int newLength) + +
+           
+static + + + + +
+<T,U> T[]
+
copyOf(U[] original, + int newLength, + java.lang.Class<? extends T[]> newType) + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+copyOf

+
+public static byte[] copyOf(byte[] original,
+                            int newLength)
+
+
+
+
+
+
+ +

+copyOf

+
+public static int[] copyOf(int[] original,
+                           int newLength)
+
+
+
+
+
+
+ +

+copyOf

+
+public static <T> T[] copyOf(T[] original,
+                             int newLength)
+
+
+
+
+
+
+ +

+copyOf

+
+public static <T,U> T[] copyOf(U[] original,
+                               int newLength,
+                               java.lang.Class<? extends T[]> newType)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/util/BufferUtils.html b/doc/api/morfologik/util/BufferUtils.html new file mode 100644 index 0000000..29302b4 --- /dev/null +++ b/doc/api/morfologik/util/BufferUtils.html @@ -0,0 +1,252 @@ + + + + + + +BufferUtils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.util +
+Class BufferUtils

+
+java.lang.Object
+  extended by morfologik.util.BufferUtils
+
+
+
+
public final class BufferUtils
extends java.lang.Object
+ + +

+Utility functions for buffers. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + +
+Method Summary
+static java.nio.ByteBufferensureCapacity(java.nio.ByteBuffer buffer, + int capacity) + +
+          Ensure the byte buffer's capacity.
+static java.nio.CharBufferensureCapacity(java.nio.CharBuffer buffer, + int capacity) + +
+          Ensure the char buffer's capacity.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+ensureCapacity

+
+public static java.nio.ByteBuffer ensureCapacity(java.nio.ByteBuffer buffer,
+                                                 int capacity)
+
+
Ensure the byte buffer's capacity. If a new buffer is allocated, its + content is empty (the old buffer's contents is not copied). +

+

+
Parameters:
buffer - The buffer to check or null if a new buffer + should be allocated.
+
+
+
+ +

+ensureCapacity

+
+public static java.nio.CharBuffer ensureCapacity(java.nio.CharBuffer buffer,
+                                                 int capacity)
+
+
Ensure the char buffer's capacity. If a new buffer is allocated, its + content is empty (the old buffer's contents is not copied). +

+

+
Parameters:
buffer - The buffer to check or null if a new buffer + should be allocated.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/util/FileUtils.html b/doc/api/morfologik/util/FileUtils.html new file mode 100644 index 0000000..13e41a6 --- /dev/null +++ b/doc/api/morfologik/util/FileUtils.html @@ -0,0 +1,251 @@ + + + + + + +FileUtils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.util +
+Class FileUtils

+
+java.lang.Object
+  extended by morfologik.util.FileUtils
+
+
+
+
public final class FileUtils
extends java.lang.Object
+ + +

+Utility functions. +

+ +

+


+ +

+ + + + + + + + + + + + + + + + +
+Method Summary
+static voidassertExists(java.io.File fsaFile, + boolean requireFile, + boolean requireDirectory) + +
+          Checks if the given file exists.
+static voidclose(java.io.Closeable... closeables) + +
+          Force any non-null closeables.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+assertExists

+
+public static void assertExists(java.io.File fsaFile,
+                                boolean requireFile,
+                                boolean requireDirectory)
+                         throws java.io.IOException
+
+
Checks if the given file exists. +

+

+ +
Throws: +
java.io.IOException
+
+
+
+ +

+close

+
+public static void close(java.io.Closeable... closeables)
+
+
Force any non-null closeables. +

+

+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/util/PerformanceTimer.html b/doc/api/morfologik/util/PerformanceTimer.html new file mode 100644 index 0000000..66d446c --- /dev/null +++ b/doc/api/morfologik/util/PerformanceTimer.html @@ -0,0 +1,303 @@ + + + + + + +PerformanceTimer + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.util +
+Class PerformanceTimer

+
+java.lang.Object
+  extended by morfologik.util.PerformanceTimer
+
+
+
+
public final class PerformanceTimer
extends java.lang.Object
+ + +

+Simple, simple performance checking. +

+ +

+


+ +

+ + + + + + + + + + + +
+Constructor Summary
PerformanceTimer() + +
+           
+  + + + + + + + + + + + + + + + + + + + +
+Method Summary
+ longelemsPerSecond(int sequences) + +
+           
+static PerformanceTimerrun(java.util.concurrent.Callable<java.lang.Void> task, + int warmupRounds, + int cycles) + +
+          Run the task with a given number of warm-up rounds and the given number + of cycles.
+ java.lang.StringtoString() + +
+           
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
+  +

+ + + + + + + + +
+Constructor Detail
+ +

+PerformanceTimer

+
+public PerformanceTimer()
+
+
+ + + + + + + + +
+Method Detail
+ +

+run

+
+public static PerformanceTimer run(java.util.concurrent.Callable<java.lang.Void> task,
+                                   int warmupRounds,
+                                   int cycles)
+
+
Run the task with a given number of warm-up rounds and the given number + of cycles. +

+

+
+
+
+
+ +

+toString

+
+public java.lang.String toString()
+
+
+
Overrides:
toString in class java.lang.Object
+
+
+
+
+
+
+ +

+elemsPerSecond

+
+public long elemsPerSecond(int sequences)
+
+
+
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/util/ResourceUtils.html b/doc/api/morfologik/util/ResourceUtils.html new file mode 100644 index 0000000..95d56b9 --- /dev/null +++ b/doc/api/morfologik/util/ResourceUtils.html @@ -0,0 +1,228 @@ + + + + + + +ResourceUtils + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ +

+ +morfologik.util +
+Class ResourceUtils

+
+java.lang.Object
+  extended by morfologik.util.ResourceUtils
+
+
+
+
public final class ResourceUtils
extends java.lang.Object
+ + +

+Resource management utilities. +

+ +

+


+ +

+ + + + + + + + + + + + +
+Method Summary
+static java.io.InputStreamopenInputStream(java.lang.String resource) + +
+          Returns an input stream to the resource.
+ + + + + + + +
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
+  +

+ + + + + + + + +
+Method Detail
+ +

+openInputStream

+
+public static java.io.InputStream openInputStream(java.lang.String resource)
+                                           throws java.io.IOException
+
+
Returns an input stream to the resource. +

+

+
Parameters:
resource - The path leading to the resource. Can be an URL, a path + leading to a class resource or a File. +
Returns:
InputStream instance. +
Throws: +
java.io.IOException - If the resource could not be found or opened.
+
+
+ +
+ + + + + + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/util/package-frame.html b/doc/api/morfologik/util/package-frame.html new file mode 100644 index 0000000..0f210df --- /dev/null +++ b/doc/api/morfologik/util/package-frame.html @@ -0,0 +1,40 @@ + + + + + + +morfologik.util + + + + + + + + + + + +morfologik.util + + + + +
+Classes  + +
+Arrays +
+BufferUtils +
+FileUtils +
+PerformanceTimer +
+ResourceUtils
+ + + + diff --git a/doc/api/morfologik/util/package-summary.html b/doc/api/morfologik/util/package-summary.html new file mode 100644 index 0000000..d50d2bd --- /dev/null +++ b/doc/api/morfologik/util/package-summary.html @@ -0,0 +1,171 @@ + + + + + + +morfologik.util + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+

+Package morfologik.util +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+Class Summary
ArraysCompatibility layer for JVM 1.5.
BufferUtilsUtility functions for buffers.
FileUtilsUtility functions.
PerformanceTimerSimple, simple performance checking.
ResourceUtilsResource management utilities.
+  + +

+

+
+
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + diff --git a/doc/api/morfologik/util/package-tree.html b/doc/api/morfologik/util/package-tree.html new file mode 100644 index 0000000..5095a1d --- /dev/null +++ b/doc/api/morfologik/util/package-tree.html @@ -0,0 +1,151 @@ + + + + + + +morfologik.util Class Hierarchy + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+
+

+Hierarchy For Package morfologik.util +

+
+
+
Package Hierarchies:
All Packages
+
+

+Class Hierarchy +

+ +
+ + + + + + + + + + + + + + + +
+ +
+ + + +
+ + + -- cgit v1.2.3