Coverage Report - com.allanbank.mongodb.bson.io.StringDecoderCache
 
Classes in this File Line Coverage Branch Coverage Complexity
StringDecoderCache
100%
26/26
100%
12/12
2.538
StringDecoderCache$TrieCache
100%
22/22
100%
12/12
2.538
StringDecoderCache$TrieCache$TrieNode
100%
24/24
100%
8/8
2.538
 
 1  
 /*
 2  
  * #%L
 3  
  * StringDecoderCache.java - mongodb-async-driver - Allanbank Consulting, Inc.
 4  
  * %%
 5  
  * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc.
 6  
  * %%
 7  
  * Licensed under the Apache License, Version 2.0 (the "License");
 8  
  * you may not use this file except in compliance with the License.
 9  
  * You may obtain a copy of the License at
 10  
  * 
 11  
  *      http://www.apache.org/licenses/LICENSE-2.0
 12  
  * 
 13  
  * Unless required by applicable law or agreed to in writing, software
 14  
  * distributed under the License is distributed on an "AS IS" BASIS,
 15  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16  
  * See the License for the specific language governing permissions and
 17  
  * limitations under the License.
 18  
  * #L%
 19  
  */
 20  
 
 21  
 package com.allanbank.mongodb.bson.io;
 22  
 
 23  
 import java.io.EOFException;
 24  
 import java.io.StreamCorruptedException;
 25  
 import java.util.List;
 26  
 import java.util.SortedMap;
 27  
 
 28  
 /**
 29  
  * StringDecoderCache provides a cache for decoding strings.
 30  
  * <p>
 31  
  * This class is thread safe. Thread safety is achieved by maintaining two data
 32  
  * structures. The first is a map of seen strings to the number of times the
 33  
  * string has been seen. The map is maintained by the base class:
 34  
  * {@link AbstractStringCache}. The second structure is a trie of the cached
 35  
  * {@code byte[]} to the decoded string.
 36  
  * </p>
 37  
  * 
 38  
  * @api.no This class is <b>NOT</b> part of the drivers API. This class may be
 39  
  *         mutated in incompatible ways between any two releases of the driver.
 40  
  * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
 41  
  */
 42  
 public class StringDecoderCache extends AbstractStringCache {
 43  
 
 44  
     /** The cached decoded strings in the form of a trie cache. */
 45  
     private TrieCache myTrieCache;
 46  
 
 47  
     /**
 48  
      * Creates a new TrieStringCache.
 49  
      */
 50  
     public StringDecoderCache() {
 51  3476
         super();
 52  
 
 53  3476
         myTrieCache = new TrieCache();
 54  3476
     }
 55  
 
 56  
     /**
 57  
      * Decode a string of a known length. The last byte should be a zero byte
 58  
      * and will not be included in the decoded string.
 59  
      * 
 60  
      * @param source
 61  
      *            The source of the bytes in the string.
 62  
      * @param offset
 63  
      *            The offset of the first byte to decode.
 64  
      * @param length
 65  
      *            The length of the string to decode with a terminal zero byte.
 66  
      * @return The decoded string.
 67  
      * @throws StreamCorruptedException
 68  
      *             On the decoding of the string failing.
 69  
      * @throws EOFException
 70  
      *             On the array not containing enough bytes to decoded.
 71  
      */
 72  
     public String find(final byte[] source, final int offset, final int length)
 73  
             throws StreamCorruptedException, EOFException {
 74  
 
 75  
         // Check for the easiest value to cache.
 76  71642
         if (length <= 1) {
 77  18
             return "";
 78  
         }
 79  
         // And for values blowing out the cache.
 80  71623
         else if (myMaxCacheLength < length) {
 81  9
             return null;
 82  
         }
 83  
         // Check for the terminal null.
 84  71615
         else if (source[(offset + length) - 1] != 0) {
 85  2
             throw new StreamCorruptedException(
 86  
                     "Expected a null character at the end of the string.");
 87  
         }
 88  
 
 89  71613
         return myTrieCache.find(source, offset, length);
 90  
     }
 91  
 
 92  
     /**
 93  
      * Clears the cache.
 94  
      */
 95  
     @Override
 96  
     protected void clear() {
 97  35
         myTrieCache = new TrieCache();
 98  35
         super.clear();
 99  35
     }
 100  
 
 101  
     /**
 102  
      * Rebuilds the cache from the current collection of seen entries.
 103  
      */
 104  
     @Override
 105  
     protected void rebuildCache() {
 106  2416
         final SortedMap<Integer, List<SeenString>> order = buildCacheGroups();
 107  
 
 108  
         // Rebuild the cache.
 109  2416
         final TrieCache cache = new TrieCache();
 110  2416
         int count = 0;
 111  2416
         for (final List<SeenString> seenAtCount : order.values()) {
 112  2565
             for (final SeenString seen : seenAtCount) {
 113  5717
                 if (count < myMaxCachEntries) {
 114  5614
                     cache.addEntry(seen.getBytes(), seen.getValue());
 115  5614
                     count += 1;
 116  
                 }
 117  
                 else {
 118  103
                     mySeen.remove(seen.getValue());
 119  
                 }
 120  5717
             }
 121  2565
         }
 122  
 
 123  2416
         myTrieCache = cache;
 124  2416
     }
 125  
 
 126  
     /**
 127  
      * A fast cache for bytes to decoded strings.
 128  
      * <p>
 129  
      * The trie is designed to be used in a RCU fashion where the trie is
 130  
      * constructed as a series of {@link #addEntry(byte[], String)} calls and
 131  
      * once the trie is fully populated then is can be queried using
 132  
      * {@link #find(byte[], int, int)}.
 133  
      * </p>
 134  
      * 
 135  
      * @api.no This class is <b>NOT</b> part of the drivers API. This class may
 136  
      *         be mutated in incompatible ways between any two releases of the
 137  
      *         driver.
 138  
      * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
 139  
      */
 140  
     protected static class TrieCache {
 141  
         /** The root nodes for the trie. */
 142  
         private final TrieNode[] myRoots;
 143  
 
 144  
         /**
 145  
          * Creates a new TrieCache.
 146  
          */
 147  5927
         public TrieCache() {
 148  5927
             myRoots = new TrieNode[256];
 149  5927
         }
 150  
 
 151  
         /**
 152  
          * Adds an entry to the trie cache.
 153  
          * 
 154  
          * @param source
 155  
          *            The source of the bytes in the string.
 156  
          * @param value
 157  
          *            The value to associate with the entry.
 158  
          */
 159  
         public void addEntry(final byte[] source, final String value) {
 160  
 
 161  
             // Minus 1 for the terminal null.
 162  5614
             final int last = source.length - 1;
 163  
 
 164  5614
             final int firstByte = source[0] & 0xFF;
 165  5614
             TrieNode node = myRoots[firstByte];
 166  5614
             if (node == null) {
 167  3064
                 node = myRoots[firstByte] = new TrieNode(source[0]);
 168  
             }
 169  
 
 170  
             // Walk the Trie to the end node.
 171  23032
             for (int i = 1; i < last; ++i) {
 172  17418
                 TrieNode child = node.child(source[i]);
 173  
 
 174  17418
                 if (child == null) {
 175  16685
                     child = new TrieNode(source[i]);
 176  16685
                     node.addChild(child);
 177  
                 }
 178  
 
 179  17418
                 node = child;
 180  
             }
 181  
 
 182  5614
             node.setDecoded(value);
 183  
 
 184  5614
         }
 185  
 
 186  
         /**
 187  
          * Finds the value for the byte string, if known to the cache. Returns
 188  
          * null on a cache miss.
 189  
          * 
 190  
          * @param source
 191  
          *            The source of the bytes in the string.
 192  
          * @param offset
 193  
          *            The offset of the first byte to decode.
 194  
          * @param length
 195  
          *            The length of the string to decode with a terminal zero
 196  
          *            byte.
 197  
          * @return The cached decoded string or null in the case of a cache
 198  
          *         miss.
 199  
          */
 200  
         public String find(final byte[] source, final int offset,
 201  
                 final int length) {
 202  
 
 203  
             // Minus 1 for the terminal null.
 204  71612
             final int last = offset + (length - 1);
 205  
 
 206  71613
             final int firstByte = source[offset] & 0xFF;
 207  71612
             TrieNode node = myRoots[firstByte];
 208  
 
 209  
             // Walk the Trie to the end node.
 210  156278
             for (int i = offset + 1; (node != null) && (i < last); ++i) {
 211  84668
                 node = node.child(source[i]);
 212  
             }
 213  
 
 214  71611
             return (node != null) ? node.getDecoded() : null;
 215  
         }
 216  
 
 217  
         /**
 218  
          * Node provides a single node in the trie.
 219  
          * <p>
 220  
          * {@link #myChildren} is a two dimension array for the children of this
 221  
          * node. We use a 2 dimension array to reduce the memory footprint of
 222  
          * the trie by taking advantage of the clustering within the encoding of
 223  
          * languages. The first dimension of the array we refer to as the
 224  
          * {@code zone} and represents the first/high nibble of the value for
 225  
          * the child node. The second dimension of the array is the second/low
 226  
          * nibble.
 227  
          * </p>
 228  
          * 
 229  
          * @api.no This class is <b>NOT</b> part of the drivers API. This class
 230  
          *         may be mutated in incompatible ways between any two releases
 231  
          *         of the driver.
 232  
          * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
 233  
          */
 234  
         protected static class TrieNode {
 235  
             /**
 236  
              * The children of the node. See the class javadoc for a description
 237  
              * of the structure.
 238  
              */
 239  
             private TrieNode[][] myChildren;
 240  
 
 241  
             /** The decoded value for the node. May be <code>null</code>. */
 242  
             private String myDecoded;
 243  
 
 244  
             /** The value for the node. */
 245  
             private final byte myValue;
 246  
 
 247  
             /**
 248  
              * Creates a new Node.
 249  
              * 
 250  
              * @param value
 251  
              *            The value for the node.
 252  
              */
 253  19749
             public TrieNode(final byte value) {
 254  19749
                 myValue = value;
 255  19749
                 myDecoded = null;
 256  
 
 257  
                 // No children, yet.
 258  19749
                 myChildren = null;
 259  19749
             }
 260  
 
 261  
             /**
 262  
              * Adds a child node to this node.
 263  
              * 
 264  
              * @param child
 265  
              *            The child node to add.
 266  
              */
 267  
             public void addChild(final TrieNode child) {
 268  16685
                 final int value = child.getValue();
 269  16685
                 final int zone = (value & 0xF0) >> 4;
 270  16685
                 final int index = (value & 0x0F);
 271  
 
 272  16685
                 if (myChildren == null) {
 273  14216
                     myChildren = new TrieNode[16][];
 274  14216
                     myChildren[zone] = new TrieNode[16];
 275  
                 }
 276  2469
                 else if (myChildren[zone] == null) {
 277  64
                     myChildren[zone] = new TrieNode[16];
 278  
                 }
 279  
 
 280  16685
                 myChildren[zone][index] = child;
 281  16685
             }
 282  
 
 283  
             /**
 284  
              * Returns the child node with the specified value.
 285  
              * 
 286  
              * @param value
 287  
              *            The value for the child to find.
 288  
              * @return The child node for the value or null if there is node
 289  
              *         child with that value.
 290  
              */
 291  
             public TrieNode child(final byte value) {
 292  102086
                 final int zone = (value & 0xF0) >> 4;
 293  102086
                 final int index = (value & 0x0F);
 294  
 
 295  102086
                 if ((myChildren != null) && (myChildren[zone] != null)) {
 296  87335
                     return myChildren[zone][index];
 297  
                 }
 298  14751
                 return null;
 299  
             }
 300  
 
 301  
             /**
 302  
              * Returns the node's decoded value.
 303  
              * 
 304  
              * @return The node's decoded value.
 305  
              */
 306  
             public String getDecoded() {
 307  1036
                 return myDecoded;
 308  
             }
 309  
 
 310  
             /**
 311  
              * Returns the node's value.
 312  
              * 
 313  
              * @return The node's value.
 314  
              */
 315  
             public byte getValue() {
 316  16685
                 return myValue;
 317  
             }
 318  
 
 319  
             /**
 320  
              * Sets the decoded value for the node.
 321  
              * 
 322  
              * @param decoded
 323  
              *            The decoded value for the node.
 324  
              */
 325  
             public void setDecoded(final String decoded) {
 326  5614
                 myDecoded = decoded;
 327  5614
             }
 328  
         }
 329  
     }
 330  
 
 331  
 }