edu.berkeley.nlp.lm.map
Class HashNgramMap<T>
java.lang.Object
edu.berkeley.nlp.lm.map.AbstractNgramMap<T>
edu.berkeley.nlp.lm.map.HashNgramMap<T>
- Type Parameters:
T
-
- All Implemented Interfaces:
- ContextEncodedNgramMap<T>, NgramMap<T>, Serializable
public final class HashNgramMap<T>
- extends AbstractNgramMap<T>
- implements ContextEncodedNgramMap<T>
- Author:
- adampauls
- See Also:
- Serialized Form
Method Summary |
void |
clearStorage()
|
boolean |
contains(int[] ngram,
int startPos,
int endPos)
|
static
|
createExplicitWordHashNgramMap(ValueContainer<T> values,
ConfigOptions opts,
int maxNgramOrder,
boolean reversed)
Note: Explicit HashNgramMap can grow beyond maxNgramOrder |
static
|
createImplicitWordHashNgramMap(ValueContainer<T> values,
ConfigOptions opts,
LongArray[] numNgramsForEachWord,
boolean reversed)
|
T |
get(int[] ngram,
int startPos,
int endPos)
|
int |
getFirstWordForOffset(long offset,
int ngramOrder)
|
int |
getLastWordForOffset(long offset,
int ngramOrder)
|
int |
getMaxNgramOrder()
|
long |
getNextContextOffset(long offset,
int ngramOrder)
|
int |
getNextWord(long offset,
int ngramOrder)
|
int[] |
getNgramForOffset(long offset,
int ngramOrder)
|
int[] |
getNgramForOffset(long offset,
int ngramOrder,
int[] ret)
|
int[] |
getNgramFromContextEncoding(long contextOffset,
int contextOrder,
int word)
|
Iterable<Long> |
getNgramOffsetsForOrder(int ngramOrder)
|
Iterable<NgramMap.Entry<T>> |
getNgramsForOrder(int ngramOrder)
|
long |
getNumNgrams(int ngramOrder)
|
long |
getOffset(long contextOffset,
int contextOrder,
int word)
|
ContextEncodedNgramLanguageModel.LmContextInfo |
getOffsetForNgram(int[] ngram,
int startPos,
int endPos)
|
long |
getOffsetForNgramInModel(int[] ngram,
int startPos,
int endPos)
Like getOffsetForNgram(int[], int, int) , but assumes that the
full n-gram is in the map (i.e. |
long |
getPrefixOffset(long offset,
int ngramOrder)
Gets the offset of the context for an n-gram (represented by offset) |
long |
getTotalSize()
|
long |
getValueAndOffset(long contextOffset,
int contextOrder,
int word,
T outputVal)
|
CustomWidthArray |
getValueStoringArray(int ngramOrder)
|
void |
handleNgramsFinished(int justFinishedOrder)
|
void |
initWithLengths(List<Long> numNGrams)
|
boolean |
isReversed()
|
long |
put(int[] ngram,
int startPos,
int endPos,
T val)
|
long |
putWithOffset(int[] ngram,
int startPos,
int endPos,
long contextOffset,
T val)
Warning: does not rehash if load factor is exceeded, must call
rehashIfNecessary explicitly. |
long |
putWithOffsetAndSuffix(int[] ngram,
int startPos,
int endPos,
long contextOffset,
long suffixOffset,
T val)
Warning: does not rehash if load factor is exceeded, must call
rehashIfNecessary explicitly. |
void |
rehashIfNecessary(int num)
|
void |
trim()
|
boolean |
wordHasBigrams(int word)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
createImplicitWordHashNgramMap
public static <T> HashNgramMap<T> createImplicitWordHashNgramMap(ValueContainer<T> values,
ConfigOptions opts,
LongArray[] numNgramsForEachWord,
boolean reversed)
createExplicitWordHashNgramMap
public static <T> HashNgramMap<T> createExplicitWordHashNgramMap(ValueContainer<T> values,
ConfigOptions opts,
int maxNgramOrder,
boolean reversed)
- Note: Explicit HashNgramMap can grow beyond maxNgramOrder
- Type Parameters:
T
- - Parameters:
values
- opts
- maxNgramOrder
- reversed
-
- Returns:
put
public long put(int[] ngram,
int startPos,
int endPos,
T val)
- Specified by:
put
in interface NgramMap<T>
putWithOffset
public long putWithOffset(int[] ngram,
int startPos,
int endPos,
long contextOffset,
T val)
- Warning: does not rehash if load factor is exceeded, must call
rehashIfNecessary explicitly. This is so that the offsets returned remain
valid. Basically, you should not use this function unless you really know
what you're doing.
- Parameters:
ngram
- startPos
- endPos
- contextOffset
- val
-
- Returns:
putWithOffsetAndSuffix
public long putWithOffsetAndSuffix(int[] ngram,
int startPos,
int endPos,
long contextOffset,
long suffixOffset,
T val)
- Warning: does not rehash if load factor is exceeded, must call
rehashIfNecessary explicitly. This is so that the offsets returned remain
valid. Basically, you should not use this function unless you really know
what you're doing.
- Parameters:
ngram
- startPos
- endPos
- contextOffset
- val
-
- Returns:
rehashIfNecessary
public void rehashIfNecessary(int num)
getValueAndOffset
public long getValueAndOffset(long contextOffset,
int contextOrder,
int word,
T outputVal)
- Specified by:
getValueAndOffset
in interface NgramMap<T>
getOffset
public long getOffset(long contextOffset,
int contextOrder,
int word)
- Specified by:
getOffset
in interface ContextEncodedNgramMap<T>
getNgramFromContextEncoding
public int[] getNgramFromContextEncoding(long contextOffset,
int contextOrder,
int word)
- Specified by:
getNgramFromContextEncoding
in interface ContextEncodedNgramMap<T>
getNextWord
public int getNextWord(long offset,
int ngramOrder)
getNextContextOffset
public long getNextContextOffset(long offset,
int ngramOrder)
getFirstWordForOffset
public int getFirstWordForOffset(long offset,
int ngramOrder)
getLastWordForOffset
public int getLastWordForOffset(long offset,
int ngramOrder)
getNgramForOffset
public int[] getNgramForOffset(long offset,
int ngramOrder)
getNgramForOffset
public int[] getNgramForOffset(long offset,
int ngramOrder,
int[] ret)
getOffsetForNgram
public ContextEncodedNgramLanguageModel.LmContextInfo getOffsetForNgram(int[] ngram,
int startPos,
int endPos)
- Specified by:
getOffsetForNgram
in interface ContextEncodedNgramMap<T>
getOffsetForNgramInModel
public long getOffsetForNgramInModel(int[] ngram,
int startPos,
int endPos)
- Like
getOffsetForNgram(int[], int, int)
, but assumes that the
full n-gram is in the map (i.e. does not back off to the largest suffix
which is in the model).
- Parameters:
ngram
- startPos
- endPos
-
- Returns:
handleNgramsFinished
public void handleNgramsFinished(int justFinishedOrder)
- Specified by:
handleNgramsFinished
in interface NgramMap<T>
initWithLengths
public void initWithLengths(List<Long> numNGrams)
- Specified by:
initWithLengths
in interface NgramMap<T>
trim
public void trim()
- Specified by:
trim
in interface NgramMap<T>
getPrefixOffset
public long getPrefixOffset(long offset,
int ngramOrder)
- Gets the offset of the context for an n-gram (represented by offset)
- Parameters:
offset
-
- Returns:
getMaxNgramOrder
public int getMaxNgramOrder()
- Specified by:
getMaxNgramOrder
in interface NgramMap<T>
getNumNgrams
public long getNumNgrams(int ngramOrder)
- Specified by:
getNumNgrams
in interface NgramMap<T>
getNgramsForOrder
public Iterable<NgramMap.Entry<T>> getNgramsForOrder(int ngramOrder)
- Specified by:
getNgramsForOrder
in interface NgramMap<T>
getNgramOffsetsForOrder
public Iterable<Long> getNgramOffsetsForOrder(int ngramOrder)
isReversed
public boolean isReversed()
wordHasBigrams
public boolean wordHasBigrams(int word)
- Specified by:
wordHasBigrams
in interface ContextEncodedNgramMap<T>
contains
public boolean contains(int[] ngram,
int startPos,
int endPos)
- Specified by:
contains
in interface NgramMap<T>
get
public T get(int[] ngram,
int startPos,
int endPos)
- Specified by:
get
in interface NgramMap<T>
getTotalSize
public long getTotalSize()
getValueStoringArray
public CustomWidthArray getValueStoringArray(int ngramOrder)
- Specified by:
getValueStoringArray
in interface NgramMap<T>
clearStorage
public void clearStorage()
- Specified by:
clearStorage
in interface NgramMap<T>