package morfologik.stemming; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.*; import morfologik.util.BufferUtils; /** * Stem and tag data associated with a given word. * *
* Important notes: *
null
if a new buffer should be allocated.
*
* @return Returns target
or the new reallocated buffer.
*/
public ByteBuffer getStemBytes(ByteBuffer target) {
target = BufferUtils.ensureCapacity(target, stemBuffer.remaining());
stemBuffer.mark();
target.put(stemBuffer);
stemBuffer.reset();
target.flip();
return target;
}
/**
* Copy the tag's binary data (no charset decoding) to a custom byte buffer.
* If the buffer is null or not large enough to hold the result, a new
* buffer is allocated.
*
* @param target
* Target byte buffer to copy the tag buffer to or
* null
if a new buffer should be allocated.
*
* @return Returns target
or the new reallocated buffer.
*/
public ByteBuffer getTagBytes(ByteBuffer target) {
target = BufferUtils.ensureCapacity(target, tagBuffer.remaining());
tagBuffer.mark();
target.put(tagBuffer);
tagBuffer.reset();
target.flip();
return target;
}
/**
* Copy the inflected word's binary data (no charset decoding) to a custom
* byte buffer. If the buffer is null or not large enough to hold the
* result, a new buffer is allocated.
*
* @param target
* Target byte buffer to copy the word buffer to or
* null
if a new buffer should be allocated.
*
* @return Returns target
or the new reallocated buffer.
*/
public ByteBuffer getWordBytes(ByteBuffer target) {
target = BufferUtils.ensureCapacity(target, wordBuffer.remaining());
wordBuffer.mark();
target.put(wordBuffer);
wordBuffer.reset();
target.flip();
return target;
}
/**
* @return Return tag data decoded to a character sequence or
* null
if no associated tag data exists.
*/
public CharSequence getTag() {
tagCharSequence = decode(tagBuffer, tagCharSequence);
return tagCharSequence.remaining() == 0 ? null : tagCharSequence;
}
/**
* @return Return stem data decoded to a character sequence or
* null
if no associated stem data exists.
*/
public CharSequence getStem() {
stemCharSequence = decode(stemBuffer, stemCharSequence);
return stemCharSequence.remaining() == 0 ? null : stemCharSequence;
}
/**
* @return Return inflected word form data. Usually the parameter passed to
* {@link DictionaryLookup#lookup(CharSequence)}.
*/
public CharSequence getWord() {
return wordCharSequence;
}
/*
*
*/
@Override
public boolean equals(Object obj) {
throw new UnsupportedOperationException(COLLECTIONS_ERROR_MESSAGE);
}
/*
*
*/
@Override
public int hashCode() {
throw new UnsupportedOperationException(COLLECTIONS_ERROR_MESSAGE);
}
@Override
public String toString() {
return "WordData["
+ this.getWord() + ","
+ this.getStem() + ","
+ this.getTag() + "]";
}
/**
* Declare a covariant of {@link Object#clone()} that returns a deep copy of
* this object. The content of all internal buffers is copied.
*/
@Override
protected WordData clone() {
final WordData clone = new WordData(this.decoder);
clone.wordCharSequence = cloneCharSequence(wordCharSequence);
clone.wordBuffer = getWordBytes(null);
clone.stemBuffer = getStemBytes(null);
clone.tagBuffer = getTagBytes(null);
return clone;
}
/**
* Clone char sequences only if not immutable.
*/
private CharSequence cloneCharSequence(CharSequence chs) {
if (chs instanceof String)
return chs;
return chs.toString();
}
/**
* Reset internal structures for storing another word's data.
*/
void reset() {
this.wordCharSequence = null;
this.wordBuffer = null;
this.stemCharSequence.clear();
this.tagCharSequence.clear();
this.stemBuffer.clear();
this.tagBuffer.clear();
}
/**
* Decode byte buffer, optionally expanding the char buffer to.
*/
private CharBuffer decode(ByteBuffer bytes, CharBuffer chars) {
chars.clear();
final int maxCapacity = (int) (bytes.remaining() * decoder.maxCharsPerByte());
if (chars.capacity() <= maxCapacity) {
chars = CharBuffer.allocate(maxCapacity);
}
bytes.mark();
decoder.reset();
decoder.decode(bytes, chars, true);
chars.flip();
bytes.reset();
return chars;
}
}