edu.berkeley.nlp.lm.map
Class HashNgramMap<T>

java.lang.Object
  extended by edu.berkeley.nlp.lm.map.AbstractNgramMap<T>
      extended by edu.berkeley.nlp.lm.map.HashNgramMap<T>
Type Parameters:
T -
All Implemented Interfaces:
ContextEncodedNgramMap<T>, NgramMap<T>, Serializable

public final class HashNgramMap<T>
extends AbstractNgramMap<T>
implements ContextEncodedNgramMap<T>

Author:
adampauls
See Also:
Serialized Form

Nested Class Summary
 
Nested classes/interfaces inherited from interface edu.berkeley.nlp.lm.map.NgramMap
NgramMap.Entry<T>
 
Field Summary
 
Fields inherited from class edu.berkeley.nlp.lm.map.AbstractNgramMap
NUM_BITS_PER_BYTE, NUM_SUFFIX_BITS, NUM_WORD_BITS, opts, SUFFIX_BIT_MASK, values, WORD_BIT_MASK
 
Method Summary
 void clearStorage()
           
 boolean contains(int[] ngram, int startPos, int endPos)
           
static
<T> HashNgramMap<T>
createExplicitWordHashNgramMap(ValueContainer<T> values, ConfigOptions opts, int maxNgramOrder, boolean reversed)
          Note: Explicit HashNgramMap can grow beyond maxNgramOrder
static
<T> HashNgramMap<T>
createImplicitWordHashNgramMap(ValueContainer<T> values, ConfigOptions opts, LongArray[] numNgramsForEachWord, boolean reversed)
           
 T get(int[] ngram, int startPos, int endPos)
           
 int getFirstWordForOffset(long offset, int ngramOrder)
           
 int getLastWordForOffset(long offset, int ngramOrder)
           
 int getMaxNgramOrder()
           
 long getNextContextOffset(long offset, int ngramOrder)
           
 int getNextWord(long offset, int ngramOrder)
           
 int[] getNgramForOffset(long offset, int ngramOrder)
           
 int[] getNgramForOffset(long offset, int ngramOrder, int[] ret)
           
 int[] getNgramFromContextEncoding(long contextOffset, int contextOrder, int word)
           
 Iterable<Long> getNgramOffsetsForOrder(int ngramOrder)
           
 Iterable<NgramMap.Entry<T>> getNgramsForOrder(int ngramOrder)
           
 long getNumNgrams(int ngramOrder)
           
 long getOffset(long contextOffset, int contextOrder, int word)
           
 ContextEncodedNgramLanguageModel.LmContextInfo getOffsetForNgram(int[] ngram, int startPos, int endPos)
           
 long getOffsetForNgramInModel(int[] ngram, int startPos, int endPos)
          Like getOffsetForNgram(int[], int, int), but assumes that the full n-gram is in the map (i.e.
 long getPrefixOffset(long offset, int ngramOrder)
          Gets the offset of the context for an n-gram (represented by offset)
 long getTotalSize()
           
 long getValueAndOffset(long contextOffset, int contextOrder, int word, T outputVal)
           
 CustomWidthArray getValueStoringArray(int ngramOrder)
           
 void handleNgramsFinished(int justFinishedOrder)
           
 void initWithLengths(List<Long> numNGrams)
           
 boolean isReversed()
           
 long put(int[] ngram, int startPos, int endPos, T val)
           
 long putWithOffset(int[] ngram, int startPos, int endPos, long contextOffset, T val)
          Warning: does not rehash if load factor is exceeded, must call rehashIfNecessary explicitly.
 long putWithOffsetAndSuffix(int[] ngram, int startPos, int endPos, long contextOffset, long suffixOffset, T val)
          Warning: does not rehash if load factor is exceeded, must call rehashIfNecessary explicitly.
 void rehashIfNecessary(int num)
           
 void trim()
           
 boolean wordHasBigrams(int word)
           
 
Methods inherited from class edu.berkeley.nlp.lm.map.AbstractNgramMap
combineToKey, containsOutOfVocab, contextOffsetOf, equals, getSubArray, getValues, wordOf
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 
Methods inherited from interface edu.berkeley.nlp.lm.map.NgramMap
getValues
 

Method Detail

createImplicitWordHashNgramMap

public static <T> HashNgramMap<T> createImplicitWordHashNgramMap(ValueContainer<T> values,
                                                                 ConfigOptions opts,
                                                                 LongArray[] numNgramsForEachWord,
                                                                 boolean reversed)

createExplicitWordHashNgramMap

public static <T> HashNgramMap<T> createExplicitWordHashNgramMap(ValueContainer<T> values,
                                                                 ConfigOptions opts,
                                                                 int maxNgramOrder,
                                                                 boolean reversed)
Note: Explicit HashNgramMap can grow beyond maxNgramOrder

Type Parameters:
T -
Parameters:
values -
opts -
maxNgramOrder -
reversed -
Returns:

put

public long put(int[] ngram,
                int startPos,
                int endPos,
                T val)
Specified by:
put in interface NgramMap<T>

putWithOffset

public long putWithOffset(int[] ngram,
                          int startPos,
                          int endPos,
                          long contextOffset,
                          T val)
Warning: does not rehash if load factor is exceeded, must call rehashIfNecessary explicitly. This is so that the offsets returned remain valid. Basically, you should not use this function unless you really know what you're doing.

Parameters:
ngram -
startPos -
endPos -
contextOffset -
val -
Returns:

putWithOffsetAndSuffix

public long putWithOffsetAndSuffix(int[] ngram,
                                   int startPos,
                                   int endPos,
                                   long contextOffset,
                                   long suffixOffset,
                                   T val)
Warning: does not rehash if load factor is exceeded, must call rehashIfNecessary explicitly. This is so that the offsets returned remain valid. Basically, you should not use this function unless you really know what you're doing.

Parameters:
ngram -
startPos -
endPos -
contextOffset -
val -
Returns:

rehashIfNecessary

public void rehashIfNecessary(int num)

getValueAndOffset

public long getValueAndOffset(long contextOffset,
                              int contextOrder,
                              int word,
                              T outputVal)
Specified by:
getValueAndOffset in interface NgramMap<T>

getOffset

public long getOffset(long contextOffset,
                      int contextOrder,
                      int word)
Specified by:
getOffset in interface ContextEncodedNgramMap<T>

getNgramFromContextEncoding

public int[] getNgramFromContextEncoding(long contextOffset,
                                         int contextOrder,
                                         int word)
Specified by:
getNgramFromContextEncoding in interface ContextEncodedNgramMap<T>

getNextWord

public int getNextWord(long offset,
                       int ngramOrder)

getNextContextOffset

public long getNextContextOffset(long offset,
                                 int ngramOrder)

getFirstWordForOffset

public int getFirstWordForOffset(long offset,
                                 int ngramOrder)

getLastWordForOffset

public int getLastWordForOffset(long offset,
                                int ngramOrder)

getNgramForOffset

public int[] getNgramForOffset(long offset,
                               int ngramOrder)

getNgramForOffset

public int[] getNgramForOffset(long offset,
                               int ngramOrder,
                               int[] ret)

getOffsetForNgram

public ContextEncodedNgramLanguageModel.LmContextInfo getOffsetForNgram(int[] ngram,
                                                                        int startPos,
                                                                        int endPos)
Specified by:
getOffsetForNgram in interface ContextEncodedNgramMap<T>

getOffsetForNgramInModel

public long getOffsetForNgramInModel(int[] ngram,
                                     int startPos,
                                     int endPos)
Like getOffsetForNgram(int[], int, int), but assumes that the full n-gram is in the map (i.e. does not back off to the largest suffix which is in the model).

Parameters:
ngram -
startPos -
endPos -
Returns:

handleNgramsFinished

public void handleNgramsFinished(int justFinishedOrder)
Specified by:
handleNgramsFinished in interface NgramMap<T>

initWithLengths

public void initWithLengths(List<Long> numNGrams)
Specified by:
initWithLengths in interface NgramMap<T>

trim

public void trim()
Specified by:
trim in interface NgramMap<T>

getPrefixOffset

public long getPrefixOffset(long offset,
                            int ngramOrder)
Gets the offset of the context for an n-gram (represented by offset)

Parameters:
offset -
Returns:

getMaxNgramOrder

public int getMaxNgramOrder()
Specified by:
getMaxNgramOrder in interface NgramMap<T>

getNumNgrams

public long getNumNgrams(int ngramOrder)
Specified by:
getNumNgrams in interface NgramMap<T>

getNgramsForOrder

public Iterable<NgramMap.Entry<T>> getNgramsForOrder(int ngramOrder)
Specified by:
getNgramsForOrder in interface NgramMap<T>

getNgramOffsetsForOrder

public Iterable<Long> getNgramOffsetsForOrder(int ngramOrder)

isReversed

public boolean isReversed()

wordHasBigrams

public boolean wordHasBigrams(int word)
Specified by:
wordHasBigrams in interface ContextEncodedNgramMap<T>

contains

public boolean contains(int[] ngram,
                        int startPos,
                        int endPos)
Specified by:
contains in interface NgramMap<T>

get

public T get(int[] ngram,
             int startPos,
             int endPos)
Specified by:
get in interface NgramMap<T>

getTotalSize

public long getTotalSize()

getValueStoringArray

public CustomWidthArray getValueStoringArray(int ngramOrder)
Specified by:
getValueStoringArray in interface NgramMap<T>

clearStorage

public void clearStorage()
Specified by:
clearStorage in interface NgramMap<T>