View Javadoc
1   /*
2    * #%L
3    * StringDecoderCache.java - mongodb-async-driver - Allanbank Consulting, Inc.
4    * %%
5    * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc.
6    * %%
7    * Licensed under the Apache License, Version 2.0 (the "License");
8    * you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   * 
11   *      http://www.apache.org/licenses/LICENSE-2.0
12   * 
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   * #L%
19   */
20  
21  package com.allanbank.mongodb.bson.io;
22  
23  import java.io.EOFException;
24  import java.io.StreamCorruptedException;
25  import java.util.List;
26  import java.util.SortedMap;
27  
28  /**
29   * StringDecoderCache provides a cache for decoding strings.
30   * <p>
31   * This class is thread safe. Thread safety is achieved by maintaining two data
32   * structures. The first is a map of seen strings to the number of times the
33   * string has been seen. The map is maintained by the base class:
34   * {@link AbstractStringCache}. The second structure is a trie of the cached
35   * {@code byte[]} to the decoded string.
36   * </p>
37   * 
38   * @api.no This class is <b>NOT</b> part of the drivers API. This class may be
39   *         mutated in incompatible ways between any two releases of the driver.
40   * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
41   */
42  public class StringDecoderCache extends AbstractStringCache {
43  
44      /** The cached decoded strings in the form of a trie cache. */
45      private TrieCache myTrieCache;
46  
47      /**
48       * Creates a new TrieStringCache.
49       */
50      public StringDecoderCache() {
51          super();
52  
53          myTrieCache = new TrieCache();
54      }
55  
56      /**
57       * Decode a string of a known length. The last byte should be a zero byte
58       * and will not be included in the decoded string.
59       * 
60       * @param source
61       *            The source of the bytes in the string.
62       * @param offset
63       *            The offset of the first byte to decode.
64       * @param length
65       *            The length of the string to decode with a terminal zero byte.
66       * @return The decoded string.
67       * @throws StreamCorruptedException
68       *             On the decoding of the string failing.
69       * @throws EOFException
70       *             On the array not containing enough bytes to decoded.
71       */
72      public String find(final byte[] source, final int offset, final int length)
73              throws StreamCorruptedException, EOFException {
74  
75          // Check for the easiest value to cache.
76          if (length <= 1) {
77              return "";
78          }
79          // And for values blowing out the cache.
80          else if (myMaxCacheLength < length) {
81              return null;
82          }
83          // Check for the terminal null.
84          else if (source[(offset + length) - 1] != 0) {
85              throw new StreamCorruptedException(
86                      "Expected a null character at the end of the string.");
87          }
88  
89          return myTrieCache.find(source, offset, length);
90      }
91  
92      /**
93       * Clears the cache.
94       */
95      @Override
96      protected void clear() {
97          myTrieCache = new TrieCache();
98          super.clear();
99      }
100 
101     /**
102      * Rebuilds the cache from the current collection of seen entries.
103      */
104     @Override
105     protected void rebuildCache() {
106         final SortedMap<Integer, List<SeenString>> order = buildCacheGroups();
107 
108         // Rebuild the cache.
109         final TrieCache cache = new TrieCache();
110         int count = 0;
111         for (final List<SeenString> seenAtCount : order.values()) {
112             for (final SeenString seen : seenAtCount) {
113                 if (count < myMaxCachEntries) {
114                     cache.addEntry(seen.getBytes(), seen.getValue());
115                     count += 1;
116                 }
117                 else {
118                     mySeen.remove(seen.getValue());
119                 }
120             }
121         }
122 
123         myTrieCache = cache;
124     }
125 
126     /**
127      * A fast cache for bytes to decoded strings.
128      * <p>
129      * The trie is designed to be used in a RCU fashion where the trie is
130      * constructed as a series of {@link #addEntry(byte[], String)} calls and
131      * once the trie is fully populated then is can be queried using
132      * {@link #find(byte[], int, int)}.
133      * </p>
134      * 
135      * @api.no This class is <b>NOT</b> part of the drivers API. This class may
136      *         be mutated in incompatible ways between any two releases of the
137      *         driver.
138      * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
139      */
140     protected static class TrieCache {
141         /** The root nodes for the trie. */
142         private final TrieNode[] myRoots;
143 
144         /**
145          * Creates a new TrieCache.
146          */
147         public TrieCache() {
148             myRoots = new TrieNode[256];
149         }
150 
151         /**
152          * Adds an entry to the trie cache.
153          * 
154          * @param source
155          *            The source of the bytes in the string.
156          * @param value
157          *            The value to associate with the entry.
158          */
159         public void addEntry(final byte[] source, final String value) {
160 
161             // Minus 1 for the terminal null.
162             final int last = source.length - 1;
163 
164             final int firstByte = source[0] & 0xFF;
165             TrieNode node = myRoots[firstByte];
166             if (node == null) {
167                 node = myRoots[firstByte] = new TrieNode(source[0]);
168             }
169 
170             // Walk the Trie to the end node.
171             for (int i = 1; i < last; ++i) {
172                 TrieNode child = node.child(source[i]);
173 
174                 if (child == null) {
175                     child = new TrieNode(source[i]);
176                     node.addChild(child);
177                 }
178 
179                 node = child;
180             }
181 
182             node.setDecoded(value);
183 
184         }
185 
186         /**
187          * Finds the value for the byte string, if known to the cache. Returns
188          * null on a cache miss.
189          * 
190          * @param source
191          *            The source of the bytes in the string.
192          * @param offset
193          *            The offset of the first byte to decode.
194          * @param length
195          *            The length of the string to decode with a terminal zero
196          *            byte.
197          * @return The cached decoded string or null in the case of a cache
198          *         miss.
199          */
200         public String find(final byte[] source, final int offset,
201                 final int length) {
202 
203             // Minus 1 for the terminal null.
204             final int last = offset + (length - 1);
205 
206             final int firstByte = source[offset] & 0xFF;
207             TrieNode node = myRoots[firstByte];
208 
209             // Walk the Trie to the end node.
210             for (int i = offset + 1; (node != null) && (i < last); ++i) {
211                 node = node.child(source[i]);
212             }
213 
214             return (node != null) ? node.getDecoded() : null;
215         }
216 
217         /**
218          * Node provides a single node in the trie.
219          * <p>
220          * {@link #myChildren} is a two dimension array for the children of this
221          * node. We use a 2 dimension array to reduce the memory footprint of
222          * the trie by taking advantage of the clustering within the encoding of
223          * languages. The first dimension of the array we refer to as the
224          * {@code zone} and represents the first/high nibble of the value for
225          * the child node. The second dimension of the array is the second/low
226          * nibble.
227          * </p>
228          * 
229          * @api.no This class is <b>NOT</b> part of the drivers API. This class
230          *         may be mutated in incompatible ways between any two releases
231          *         of the driver.
232          * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
233          */
234         protected static class TrieNode {
235             /**
236              * The children of the node. See the class javadoc for a description
237              * of the structure.
238              */
239             private TrieNode[][] myChildren;
240 
241             /** The decoded value for the node. May be <code>null</code>. */
242             private String myDecoded;
243 
244             /** The value for the node. */
245             private final byte myValue;
246 
247             /**
248              * Creates a new Node.
249              * 
250              * @param value
251              *            The value for the node.
252              */
253             public TrieNode(final byte value) {
254                 myValue = value;
255                 myDecoded = null;
256 
257                 // No children, yet.
258                 myChildren = null;
259             }
260 
261             /**
262              * Adds a child node to this node.
263              * 
264              * @param child
265              *            The child node to add.
266              */
267             public void addChild(final TrieNode child) {
268                 final int value = child.getValue();
269                 final int zone = (value & 0xF0) >> 4;
270                 final int index = (value & 0x0F);
271 
272                 if (myChildren == null) {
273                     myChildren = new TrieNode[16][];
274                     myChildren[zone] = new TrieNode[16];
275                 }
276                 else if (myChildren[zone] == null) {
277                     myChildren[zone] = new TrieNode[16];
278                 }
279 
280                 myChildren[zone][index] = child;
281             }
282 
283             /**
284              * Returns the child node with the specified value.
285              * 
286              * @param value
287              *            The value for the child to find.
288              * @return The child node for the value or null if there is node
289              *         child with that value.
290              */
291             public TrieNode child(final byte value) {
292                 final int zone = (value & 0xF0) >> 4;
293                 final int index = (value & 0x0F);
294 
295                 if ((myChildren != null) && (myChildren[zone] != null)) {
296                     return myChildren[zone][index];
297                 }
298                 return null;
299             }
300 
301             /**
302              * Returns the node's decoded value.
303              * 
304              * @return The node's decoded value.
305              */
306             public String getDecoded() {
307                 return myDecoded;
308             }
309 
310             /**
311              * Returns the node's value.
312              * 
313              * @return The node's value.
314              */
315             public byte getValue() {
316                 return myValue;
317             }
318 
319             /**
320              * Sets the decoded value for the node.
321              * 
322              * @param decoded
323              *            The decoded value for the node.
324              */
325             public void setDecoded(final String decoded) {
326                 myDecoded = decoded;
327             }
328         }
329     }
330 
331 }