summaryrefslogtreecommitdiff
path: root/morfologik-fsa/src/main/java/morfologik/fsa/FSAInfo.java
blob: ab6561019730251be245dcf04602be496c8df336 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package morfologik.fsa;

import java.util.BitSet;

import com.carrotsearch.hppc.IntIntHashMap;

/**
 * Compute additional information about an FSA: number of arcs, nodes, etc.
 */
public final class FSAInfo {
  /**
   * Computes the exact number of states and nodes by recursively traversing the FSA.
   */
  private static class NodeVisitor {
    final BitSet visitedArcs = new BitSet();
    final BitSet visitedNodes = new BitSet();

    int nodes;
    int arcs;
    int totalArcs;

    private final FSA fsa;

    NodeVisitor(FSA fsa) {
      this.fsa = fsa;
    }

    public void visitNode(final int node) {
      if (visitedNodes.get(node)) {
        return;
      }
      visitedNodes.set(node);

      nodes++;
      for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
        if (!visitedArcs.get(arc)) {
          arcs++;
        }
        totalArcs++;
        visitedArcs.set(arc);

        if (!fsa.isArcTerminal(arc)) {
          visitNode(fsa.getEndNode(arc));
        }
      }
    }
  }

  /**
   * Computes the exact number of final states.
   */
  private static class FinalStateVisitor {
    final IntIntHashMap visitedNodes = new IntIntHashMap();

    private final FSA fsa;

    FinalStateVisitor(FSA fsa) {
      this.fsa = fsa;
    }

    public int visitNode(int node) {
      int index = visitedNodes.indexOf(node);
      if (index >= 0) {
        return visitedNodes.indexGet(index);
      }

      int fromHere = 0;
      for (int arc = fsa.getFirstArc(node); arc != 0; arc = fsa.getNextArc(arc)) {
        if (fsa.isArcFinal(arc))
          fromHere++;

        if (!fsa.isArcTerminal(arc)) {
          fromHere += visitNode(fsa.getEndNode(arc));
        }
      }
      visitedNodes.put(node, fromHere);
      return fromHere;
    }
  }

  /**
   * Number of nodes in the automaton.
   */
  public final int nodeCount;

  /**
   * Number of arcs in the automaton, excluding an arcs from the zero node (initial) and an arc from the start node to
   * the root node.
   */
  public final int arcsCount;

  /**
   * Total number of arcs, counting arcs that physically overlap due to merging.
   */
  public final int arcsCountTotal;

  /**
   * Number of final states (number of input sequences stored in the automaton).
   */
  public final int finalStatesCount;

  /**
   * Arcs size (in serialized form).
   */
  public final int size;

  /*
	 * 
	 */
  public FSAInfo(FSA fsa) {
    final NodeVisitor w = new NodeVisitor(fsa);
    int root = fsa.getRootNode();
    if (root > 0) {
      w.visitNode(root);
    }

    this.nodeCount = 1 + w.nodes;
    this.arcsCount = 1 + w.arcs;
    this.arcsCountTotal = 1 + w.totalArcs;

    final FinalStateVisitor fsv = new FinalStateVisitor(fsa);
    this.finalStatesCount = fsv.visitNode(fsa.getRootNode());

    if (fsa instanceof FSA5) {
      this.size = ((FSA5) fsa).arcs.length;
    } else {
      this.size = 0;
    }
  }

  /*
	 * 
	 */
  public FSAInfo(int nodeCount, int arcsCount, int arcsCountTotal, int finalStatesCount) {
    this.nodeCount = nodeCount;
    this.arcsCount = arcsCount;
    this.arcsCountTotal = arcsCountTotal;
    this.finalStatesCount = finalStatesCount;
    this.size = 0;
  }

  /*
	 * 
	 */
  @Override
  public String toString() {
    return "Nodes: " + nodeCount + ", arcs visited: " + arcsCount + ", arcs total: " + arcsCountTotal
        + ", final states: " + finalStatesCount + ", size: " + size;
  }
}