summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--morfologik-fsa/src/main/java/morfologik/fsa/CFSA2Serializer.java16
-rw-r--r--morfologik-fsa/src/main/java/morfologik/fsa/FSA5Serializer.java4
-rw-r--r--morfologik-fsa/src/main/java/morfologik/fsa/FSAInfo.java274
-rw-r--r--morfologik-fsa/src/main/java/morfologik/fsa/FSAUtils.java6
-rw-r--r--morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java6
-rw-r--r--morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java6
6 files changed, 153 insertions, 159 deletions
diff --git a/morfologik-fsa/src/main/java/morfologik/fsa/CFSA2Serializer.java b/morfologik-fsa/src/main/java/morfologik/fsa/CFSA2Serializer.java
index 8026f33..c4a5868 100644
--- a/morfologik-fsa/src/main/java/morfologik/fsa/CFSA2Serializer.java
+++ b/morfologik-fsa/src/main/java/morfologik/fsa/CFSA2Serializer.java
@@ -23,7 +23,7 @@ import morfologik.util.FileUtils;
import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.BoundedProportionalArraySizingStrategy;
import com.carrotsearch.hppc.IntArrayList;
-import com.carrotsearch.hppc.IntIntOpenHashMap;
+import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntStack;
import com.carrotsearch.hppc.cursors.IntCursor;
import com.carrotsearch.hppc.cursors.IntIntCursor;
@@ -60,12 +60,12 @@ public final class CFSA2Serializer implements FSASerializer {
/**
* A hash map of [state, offset] pairs.
*/
- private IntIntOpenHashMap offsets = new IntIntOpenHashMap();
+ private IntIntHashMap offsets = new IntIntHashMap();
/**
* A hash map of [state, right-language-count] pairs.
*/
- private IntIntOpenHashMap numbers = new IntIntOpenHashMap();
+ private IntIntHashMap numbers = new IntIntHashMap();
/**
* Scratch array for serializing vints.
@@ -216,7 +216,7 @@ public final class CFSA2Serializer implements FSASerializer {
* Compute the states with most inlinks. These should be placed as close to the
* start of the automaton, as possible so that v-coded addresses are tiny.
*/
- final IntIntOpenHashMap inlinkCount = computeInlinkCount(fsa);
+ final IntIntHashMap inlinkCount = computeInlinkCount(fsa);
/*
* An array of ordered states for serialization.
@@ -282,7 +282,7 @@ public final class CFSA2Serializer implements FSASerializer {
* calculating stable state offsets.
*/
private int linearizeAndCalculateOffsets(FSA fsa, IntArrayList states,
- IntArrayList linearized, IntIntOpenHashMap offsets) throws IOException
+ IntArrayList linearized, IntIntHashMap offsets) throws IOException
{
final BitSet visited = new BitSet();
final IntStack nodes = new IntStack();
@@ -346,7 +346,7 @@ public final class CFSA2Serializer implements FSASerializer {
* Compute the set of states that should be linearized first to minimize other
* states goto length.
*/
- private ArrayDeque<Integer> computeFirstStates(IntIntOpenHashMap inlinkCount,
+ private ArrayDeque<Integer> computeFirstStates(IntIntHashMap inlinkCount,
int maxStates,
int minInlinkCount)
{
@@ -382,8 +382,8 @@ public final class CFSA2Serializer implements FSASerializer {
/**
* Compute in-link count for each state.
*/
- private IntIntOpenHashMap computeInlinkCount(final FSA fsa) {
- IntIntOpenHashMap inlinkCount = new IntIntOpenHashMap();
+ private IntIntHashMap computeInlinkCount(final FSA fsa) {
+ IntIntHashMap inlinkCount = new IntIntHashMap();
BitSet visited = new BitSet();
IntStack nodes = new IntStack();
nodes.push(fsa.getRootNode());
diff --git a/morfologik-fsa/src/main/java/morfologik/fsa/FSA5Serializer.java b/morfologik-fsa/src/main/java/morfologik/fsa/FSA5Serializer.java
index 21627a9..e4821d1 100644
--- a/morfologik-fsa/src/main/java/morfologik/fsa/FSA5Serializer.java
+++ b/morfologik-fsa/src/main/java/morfologik/fsa/FSA5Serializer.java
@@ -64,12 +64,12 @@ public final class FSA5Serializer implements FSASerializer {
/**
* A hash map of [state, offset] pairs.
*/
- private IntIntOpenHashMap offsets = new IntIntOpenHashMap();
+ private IntIntHashMap offsets = new IntIntHashMap();
/**
* A hash map of [state, right-language-count] pairs.
*/
- private IntIntOpenHashMap numbers = new IntIntOpenHashMap();
+ private IntIntHashMap numbers = new IntIntHashMap();
/**
* Serialize the automaton with the number of right-language sequences in
diff --git a/morfologik-fsa/src/main/java/morfologik/fsa/FSAInfo.java b/morfologik-fsa/src/main/java/morfologik/fsa/FSAInfo.java
index 4015dce..ab65610 100644
--- a/morfologik-fsa/src/main/java/morfologik/fsa/FSAInfo.java
+++ b/morfologik-fsa/src/main/java/morfologik/fsa/FSAInfo.java
@@ -2,155 +2,149 @@ package morfologik.fsa;
import java.util.BitSet;
-import com.carrotsearch.hppc.IntIntOpenHashMap;
+import com.carrotsearch.hppc.IntIntHashMap;
/**
* Compute additional information about an FSA: number of arcs, nodes, etc.
*/
public final class FSAInfo {
- /**
- * Computes the exact number of states and nodes by recursively traversing
- * the FSA.
- */
- private static class NodeVisitor {
- final BitSet visitedArcs = new BitSet();
- final BitSet visitedNodes = new BitSet();
-
- int nodes;
- int arcs;
- int totalArcs;
-
- private final FSA fsa;
-
- NodeVisitor(FSA fsa) {
- this.fsa = fsa;
- }
-
- public void visitNode(final int node) {
- if (visitedNodes.get(node)) {
- return;
- }
- visitedNodes.set(node);
-
- nodes++;
- for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa
- .getNextArc(arc)) {
- if (!visitedArcs.get(arc)) {
- arcs++;
- }
- totalArcs++;
- visitedArcs.set(arc);
-
- if (!fsa.isArcTerminal(arc)) {
- visitNode(fsa.getEndNode(arc));
- }
- }
- }
- }
-
- /**
- * Computes the exact number of final states.
- */
- private static class FinalStateVisitor {
- final IntIntOpenHashMap visitedNodes = new IntIntOpenHashMap();
-
- private final FSA fsa;
-
- FinalStateVisitor(FSA fsa) {
- this.fsa = fsa;
- }
-
- public int visitNode(int node) {
- if (visitedNodes.containsKey(node))
- return visitedNodes.lget();
-
- int fromHere = 0;
- for (int arc = fsa.getFirstArc(node);
- arc != 0; arc = fsa.getNextArc(arc))
- {
- if (fsa.isArcFinal(arc))
- fromHere++;
-
- if (!fsa.isArcTerminal(arc)) {
- fromHere += visitNode(fsa.getEndNode(arc));
- }
- }
- visitedNodes.put(node, fromHere);
- return fromHere;
- }
- }
-
- /**
- * Number of nodes in the automaton.
- */
- public final int nodeCount;
-
- /**
- * Number of arcs in the automaton, excluding an arcs from the zero node
- * (initial) and an arc from the start node to the root node.
- */
- public final int arcsCount;
-
- /**
- * Total number of arcs, counting arcs that physically overlap due to
- * merging.
- */
- public final int arcsCountTotal;
-
- /**
- * Number of final states (number of input sequences stored in the automaton).
- */
- public final int finalStatesCount;
-
- /**
- * Arcs size (in serialized form).
- */
- public final int size;
-
- /*
+ /**
+ * Computes the exact number of states and nodes by recursively traversing the FSA.
+ */
+ private static class NodeVisitor {
+ final BitSet visitedArcs = new BitSet();
+ final BitSet visitedNodes = new BitSet();
+
+ int nodes;
+ int arcs;
+ int totalArcs;
+
+ private final FSA fsa;
+
+ NodeVisitor(FSA fsa) {
+ this.fsa = fsa;
+ }
+
+ public void visitNode(final int node) {
+ if (visitedNodes.get(node)) {
+ return;
+ }
+ visitedNodes.set(node);
+
+ nodes++;
+ for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
+ if (!visitedArcs.get(arc)) {
+ arcs++;
+ }
+ totalArcs++;
+ visitedArcs.set(arc);
+
+ if (!fsa.isArcTerminal(arc)) {
+ visitNode(fsa.getEndNode(arc));
+ }
+ }
+ }
+ }
+
+ /**
+ * Computes the exact number of final states.
+ */
+ private static class FinalStateVisitor {
+ final IntIntHashMap visitedNodes = new IntIntHashMap();
+
+ private final FSA fsa;
+
+ FinalStateVisitor(FSA fsa) {
+ this.fsa = fsa;
+ }
+
+ public int visitNode(int node) {
+ int index = visitedNodes.indexOf(node);
+ if (index >= 0) {
+ return visitedNodes.indexGet(index);
+ }
+
+ int fromHere = 0;
+ for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
+ if (fsa.isArcFinal(arc))
+ fromHere++;
+
+ if (!fsa.isArcTerminal(arc)) {
+ fromHere += visitNode(fsa.getEndNode(arc));
+ }
+ }
+ visitedNodes.put(node, fromHere);
+ return fromHere;
+ }
+ }
+
+ /**
+ * Number of nodes in the automaton.
+ */
+ public final int nodeCount;
+
+ /**
+ * Number of arcs in the automaton, excluding an arcs from the zero node (initial) and an arc from the start node to
+ * the root node.
+ */
+ public final int arcsCount;
+
+ /**
+ * Total number of arcs, counting arcs that physically overlap due to merging.
+ */
+ public final int arcsCountTotal;
+
+ /**
+ * Number of final states (number of input sequences stored in the automaton).
+ */
+ public final int finalStatesCount;
+
+ /**
+ * Arcs size (in serialized form).
+ */
+ public final int size;
+
+ /*
*
*/
- public FSAInfo(FSA fsa) {
- final NodeVisitor w = new NodeVisitor(fsa);
- int root = fsa.getRootNode();
- if (root > 0) {
- w.visitNode(root);
- }
-
- this.nodeCount = 1 + w.nodes;
- this.arcsCount = 1 + w.arcs;
- this.arcsCountTotal = 1 + w.totalArcs;
-
- final FinalStateVisitor fsv = new FinalStateVisitor(fsa);
- this.finalStatesCount = fsv.visitNode(fsa.getRootNode());
-
- if (fsa instanceof FSA5) {
- this.size = ((FSA5) fsa).arcs.length;
- } else {
- this.size = 0;
- }
- }
-
- /*
+ public FSAInfo(FSA fsa) {
+ final NodeVisitor w = new NodeVisitor(fsa);
+ int root = fsa.getRootNode();
+ if (root > 0) {
+ w.visitNode(root);
+ }
+
+ this.nodeCount = 1 + w.nodes;
+ this.arcsCount = 1 + w.arcs;
+ this.arcsCountTotal = 1 + w.totalArcs;
+
+ final FinalStateVisitor fsv = new FinalStateVisitor(fsa);
+ this.finalStatesCount = fsv.visitNode(fsa.getRootNode());
+
+ if (fsa instanceof FSA5) {
+ this.size = ((FSA5) fsa).arcs.length;
+ } else {
+ this.size = 0;
+ }
+ }
+
+ /*
*
*/
- public FSAInfo(int nodeCount, int arcsCount, int arcsCountTotal, int finalStatesCount) {
- this.nodeCount = nodeCount;
- this.arcsCount = arcsCount;
- this.arcsCountTotal = arcsCountTotal;
- this.finalStatesCount = finalStatesCount;
- this.size = 0;
- }
-
- /*
+ public FSAInfo(int nodeCount, int arcsCount, int arcsCountTotal, int finalStatesCount) {
+ this.nodeCount = nodeCount;
+ this.arcsCount = arcsCount;
+ this.arcsCountTotal = arcsCountTotal;
+ this.finalStatesCount = finalStatesCount;
+ this.size = 0;
+ }
+
+ /*
*
*/
- @Override
- public String toString() {
- return "Nodes: " + nodeCount
- + ", arcs visited: " + arcsCount
- + ", arcs total: " + arcsCountTotal
- + ", final states: " + finalStatesCount
- + ", size: " + size;
- }
+ @Override
+ public String toString() {
+ return "Nodes: " + nodeCount + ", arcs visited: " + arcsCount + ", arcs total: " + arcsCountTotal
+ + ", final states: " + finalStatesCount + ", size: " + size;
+ }
}
diff --git a/morfologik-fsa/src/main/java/morfologik/fsa/FSAUtils.java b/morfologik-fsa/src/main/java/morfologik/fsa/FSAUtils.java
index cad611e..d368350 100644
--- a/morfologik-fsa/src/main/java/morfologik/fsa/FSAUtils.java
+++ b/morfologik-fsa/src/main/java/morfologik/fsa/FSAUtils.java
@@ -8,7 +8,7 @@ import java.util.Arrays;
import java.util.BitSet;
import java.util.TreeMap;
-import com.carrotsearch.hppc.IntIntOpenHashMap;
+import com.carrotsearch.hppc.IntIntHashMap;
/**
* Other FSA-related utilities not directly associated with the class hierarchy.
@@ -180,8 +180,8 @@ public final class FSAUtils {
/**
* Calculate the size of right language for each state in an FSA.
*/
- public static IntIntOpenHashMap rightLanguageForAllStates(final FSA fsa) {
- final IntIntOpenHashMap numbers = new IntIntOpenHashMap();
+ public static IntIntHashMap rightLanguageForAllStates(final FSA fsa) {
+ final IntIntHashMap numbers = new IntIntHashMap();
fsa.visitInPostOrder(new StateVisitor() {
public boolean accept(int state) {
diff --git a/morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java b/morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java
index 687b6cb..6bfcfbf 100644
--- a/morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java
+++ b/morfologik-tools/src/main/java/morfologik/tools/FSABuildTool.java
@@ -33,7 +33,7 @@ import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang.StringEscapeUtils;
-import com.carrotsearch.hppc.IntIntOpenHashMap;
+import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
/**
@@ -176,7 +176,7 @@ public final class FSABuildTool extends Tool {
TreeMap<Integer, Integer> fanout = FSAUtils.calculateFanOuts(fsa, fsa.getRootNode());
logger.endPart();
- final IntIntOpenHashMap numbers = new IntIntOpenHashMap();
+ final IntIntHashMap numbers = new IntIntHashMap();
fsa.visitInPostOrder(new StateVisitor() {
public boolean accept(int state) {
int thisNodeNumber = 0;
@@ -538,4 +538,4 @@ public final class FSABuildTool extends Tool {
final FSABuildTool tool = new FSABuildTool();
tool.go(args);
}
-} \ No newline at end of file
+}
diff --git a/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java b/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java
index d0379d7..40be4a8 100644
--- a/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java
+++ b/morfologik-tools/src/test/java/morfologik/tools/SequenceEncodersRandomizedTest.java
@@ -62,8 +62,8 @@ public class SequenceEncodersRandomizedTest extends RandomizedTest {
{
ByteArrayList src = ByteArrayList.from(srcString.getBytes(UTF8));
ByteArrayList dst = ByteArrayList.from(dstString.getBytes(UTF8));
- ByteArrayList encoded = ByteArrayList.newInstance();
- ByteArrayList decoded = ByteArrayList.newInstance();
+ ByteArrayList encoded = new ByteArrayList();
+ ByteArrayList decoded = new ByteArrayList();
coder.encode(src, dst, encoded);
coder.decode(src, encoded, decoded);
@@ -88,7 +88,7 @@ public class SequenceEncodersRandomizedTest extends RandomizedTest {
encoded.size(),
ByteBuffer.wrap(src.toArray()), builder.build());
- ByteArrayList decoded2 = ByteArrayList.newInstance();
+ ByteArrayList decoded2 = new ByteArrayList();
bb.flip();
while (bb.hasRemaining()) decoded2.add(bb.get());