| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| StringDecoderCache |
|
| 2.5384615384615383;2.538 | ||||
| StringDecoderCache$TrieCache |
|
| 2.5384615384615383;2.538 | ||||
| StringDecoderCache$TrieCache$TrieNode |
|
| 2.5384615384615383;2.538 |
| 1 | /* | |
| 2 | * #%L | |
| 3 | * StringDecoderCache.java - mongodb-async-driver - Allanbank Consulting, Inc. | |
| 4 | * %% | |
| 5 | * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc. | |
| 6 | * %% | |
| 7 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 8 | * you may not use this file except in compliance with the License. | |
| 9 | * You may obtain a copy of the License at | |
| 10 | * | |
| 11 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 12 | * | |
| 13 | * Unless required by applicable law or agreed to in writing, software | |
| 14 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 16 | * See the License for the specific language governing permissions and | |
| 17 | * limitations under the License. | |
| 18 | * #L% | |
| 19 | */ | |
| 20 | ||
| 21 | package com.allanbank.mongodb.bson.io; | |
| 22 | ||
| 23 | import java.io.EOFException; | |
| 24 | import java.io.StreamCorruptedException; | |
| 25 | import java.util.List; | |
| 26 | import java.util.SortedMap; | |
| 27 | ||
| 28 | /** | |
| 29 | * StringDecoderCache provides a cache for decoding strings. | |
| 30 | * <p> | |
| 31 | * This class is thread safe. Thread safety is achieved by maintaining two data | |
| 32 | * structures. The first is a map of seen strings to the number of times the | |
| 33 | * string has been seen. The map is maintained by the base class: | |
| 34 | * {@link AbstractStringCache}. The second structure is a trie of the cached | |
| 35 | * {@code byte[]} to the decoded string. | |
| 36 | * </p> | |
| 37 | * | |
| 38 | * @api.no This class is <b>NOT</b> part of the drivers API. This class may be | |
| 39 | * mutated in incompatible ways between any two releases of the driver. | |
| 40 | * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved | |
| 41 | */ | |
| 42 | public class StringDecoderCache extends AbstractStringCache { | |
| 43 | ||
| 44 | /** The cached decoded strings in the form of a trie cache. */ | |
| 45 | private TrieCache myTrieCache; | |
| 46 | ||
| 47 | /** | |
| 48 | * Creates a new TrieStringCache. | |
| 49 | */ | |
| 50 | public StringDecoderCache() { | |
| 51 | 3476 | super(); |
| 52 | ||
| 53 | 3476 | myTrieCache = new TrieCache(); |
| 54 | 3476 | } |
| 55 | ||
| 56 | /** | |
| 57 | * Decode a string of a known length. The last byte should be a zero byte | |
| 58 | * and will not be included in the decoded string. | |
| 59 | * | |
| 60 | * @param source | |
| 61 | * The source of the bytes in the string. | |
| 62 | * @param offset | |
| 63 | * The offset of the first byte to decode. | |
| 64 | * @param length | |
| 65 | * The length of the string to decode with a terminal zero byte. | |
| 66 | * @return The decoded string. | |
| 67 | * @throws StreamCorruptedException | |
| 68 | * On the decoding of the string failing. | |
| 69 | * @throws EOFException | |
| 70 | * On the array not containing enough bytes to decoded. | |
| 71 | */ | |
| 72 | public String find(final byte[] source, final int offset, final int length) | |
| 73 | throws StreamCorruptedException, EOFException { | |
| 74 | ||
| 75 | // Check for the easiest value to cache. | |
| 76 | 71642 | if (length <= 1) { |
| 77 | 18 | return ""; |
| 78 | } | |
| 79 | // And for values blowing out the cache. | |
| 80 | 71623 | else if (myMaxCacheLength < length) { |
| 81 | 9 | return null; |
| 82 | } | |
| 83 | // Check for the terminal null. | |
| 84 | 71615 | else if (source[(offset + length) - 1] != 0) { |
| 85 | 2 | throw new StreamCorruptedException( |
| 86 | "Expected a null character at the end of the string."); | |
| 87 | } | |
| 88 | ||
| 89 | 71613 | return myTrieCache.find(source, offset, length); |
| 90 | } | |
| 91 | ||
| 92 | /** | |
| 93 | * Clears the cache. | |
| 94 | */ | |
| 95 | @Override | |
| 96 | protected void clear() { | |
| 97 | 35 | myTrieCache = new TrieCache(); |
| 98 | 35 | super.clear(); |
| 99 | 35 | } |
| 100 | ||
| 101 | /** | |
| 102 | * Rebuilds the cache from the current collection of seen entries. | |
| 103 | */ | |
| 104 | @Override | |
| 105 | protected void rebuildCache() { | |
| 106 | 2416 | final SortedMap<Integer, List<SeenString>> order = buildCacheGroups(); |
| 107 | ||
| 108 | // Rebuild the cache. | |
| 109 | 2416 | final TrieCache cache = new TrieCache(); |
| 110 | 2416 | int count = 0; |
| 111 | 2416 | for (final List<SeenString> seenAtCount : order.values()) { |
| 112 | 2565 | for (final SeenString seen : seenAtCount) { |
| 113 | 5717 | if (count < myMaxCachEntries) { |
| 114 | 5614 | cache.addEntry(seen.getBytes(), seen.getValue()); |
| 115 | 5614 | count += 1; |
| 116 | } | |
| 117 | else { | |
| 118 | 103 | mySeen.remove(seen.getValue()); |
| 119 | } | |
| 120 | 5717 | } |
| 121 | 2565 | } |
| 122 | ||
| 123 | 2416 | myTrieCache = cache; |
| 124 | 2416 | } |
| 125 | ||
| 126 | /** | |
| 127 | * A fast cache for bytes to decoded strings. | |
| 128 | * <p> | |
| 129 | * The trie is designed to be used in a RCU fashion where the trie is | |
| 130 | * constructed as a series of {@link #addEntry(byte[], String)} calls and | |
| 131 | * once the trie is fully populated then is can be queried using | |
| 132 | * {@link #find(byte[], int, int)}. | |
| 133 | * </p> | |
| 134 | * | |
| 135 | * @api.no This class is <b>NOT</b> part of the drivers API. This class may | |
| 136 | * be mutated in incompatible ways between any two releases of the | |
| 137 | * driver. | |
| 138 | * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved | |
| 139 | */ | |
| 140 | protected static class TrieCache { | |
| 141 | /** The root nodes for the trie. */ | |
| 142 | private final TrieNode[] myRoots; | |
| 143 | ||
| 144 | /** | |
| 145 | * Creates a new TrieCache. | |
| 146 | */ | |
| 147 | 5927 | public TrieCache() { |
| 148 | 5927 | myRoots = new TrieNode[256]; |
| 149 | 5927 | } |
| 150 | ||
| 151 | /** | |
| 152 | * Adds an entry to the trie cache. | |
| 153 | * | |
| 154 | * @param source | |
| 155 | * The source of the bytes in the string. | |
| 156 | * @param value | |
| 157 | * The value to associate with the entry. | |
| 158 | */ | |
| 159 | public void addEntry(final byte[] source, final String value) { | |
| 160 | ||
| 161 | // Minus 1 for the terminal null. | |
| 162 | 5614 | final int last = source.length - 1; |
| 163 | ||
| 164 | 5614 | final int firstByte = source[0] & 0xFF; |
| 165 | 5614 | TrieNode node = myRoots[firstByte]; |
| 166 | 5614 | if (node == null) { |
| 167 | 3064 | node = myRoots[firstByte] = new TrieNode(source[0]); |
| 168 | } | |
| 169 | ||
| 170 | // Walk the Trie to the end node. | |
| 171 | 23032 | for (int i = 1; i < last; ++i) { |
| 172 | 17418 | TrieNode child = node.child(source[i]); |
| 173 | ||
| 174 | 17418 | if (child == null) { |
| 175 | 16685 | child = new TrieNode(source[i]); |
| 176 | 16685 | node.addChild(child); |
| 177 | } | |
| 178 | ||
| 179 | 17418 | node = child; |
| 180 | } | |
| 181 | ||
| 182 | 5614 | node.setDecoded(value); |
| 183 | ||
| 184 | 5614 | } |
| 185 | ||
| 186 | /** | |
| 187 | * Finds the value for the byte string, if known to the cache. Returns | |
| 188 | * null on a cache miss. | |
| 189 | * | |
| 190 | * @param source | |
| 191 | * The source of the bytes in the string. | |
| 192 | * @param offset | |
| 193 | * The offset of the first byte to decode. | |
| 194 | * @param length | |
| 195 | * The length of the string to decode with a terminal zero | |
| 196 | * byte. | |
| 197 | * @return The cached decoded string or null in the case of a cache | |
| 198 | * miss. | |
| 199 | */ | |
| 200 | public String find(final byte[] source, final int offset, | |
| 201 | final int length) { | |
| 202 | ||
| 203 | // Minus 1 for the terminal null. | |
| 204 | 71612 | final int last = offset + (length - 1); |
| 205 | ||
| 206 | 71613 | final int firstByte = source[offset] & 0xFF; |
| 207 | 71612 | TrieNode node = myRoots[firstByte]; |
| 208 | ||
| 209 | // Walk the Trie to the end node. | |
| 210 | 156278 | for (int i = offset + 1; (node != null) && (i < last); ++i) { |
| 211 | 84668 | node = node.child(source[i]); |
| 212 | } | |
| 213 | ||
| 214 | 71611 | return (node != null) ? node.getDecoded() : null; |
| 215 | } | |
| 216 | ||
| 217 | /** | |
| 218 | * Node provides a single node in the trie. | |
| 219 | * <p> | |
| 220 | * {@link #myChildren} is a two dimension array for the children of this | |
| 221 | * node. We use a 2 dimension array to reduce the memory footprint of | |
| 222 | * the trie by taking advantage of the clustering within the encoding of | |
| 223 | * languages. The first dimension of the array we refer to as the | |
| 224 | * {@code zone} and represents the first/high nibble of the value for | |
| 225 | * the child node. The second dimension of the array is the second/low | |
| 226 | * nibble. | |
| 227 | * </p> | |
| 228 | * | |
| 229 | * @api.no This class is <b>NOT</b> part of the drivers API. This class | |
| 230 | * may be mutated in incompatible ways between any two releases | |
| 231 | * of the driver. | |
| 232 | * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved | |
| 233 | */ | |
| 234 | protected static class TrieNode { | |
| 235 | /** | |
| 236 | * The children of the node. See the class javadoc for a description | |
| 237 | * of the structure. | |
| 238 | */ | |
| 239 | private TrieNode[][] myChildren; | |
| 240 | ||
| 241 | /** The decoded value for the node. May be <code>null</code>. */ | |
| 242 | private String myDecoded; | |
| 243 | ||
| 244 | /** The value for the node. */ | |
| 245 | private final byte myValue; | |
| 246 | ||
| 247 | /** | |
| 248 | * Creates a new Node. | |
| 249 | * | |
| 250 | * @param value | |
| 251 | * The value for the node. | |
| 252 | */ | |
| 253 | 19749 | public TrieNode(final byte value) { |
| 254 | 19749 | myValue = value; |
| 255 | 19749 | myDecoded = null; |
| 256 | ||
| 257 | // No children, yet. | |
| 258 | 19749 | myChildren = null; |
| 259 | 19749 | } |
| 260 | ||
| 261 | /** | |
| 262 | * Adds a child node to this node. | |
| 263 | * | |
| 264 | * @param child | |
| 265 | * The child node to add. | |
| 266 | */ | |
| 267 | public void addChild(final TrieNode child) { | |
| 268 | 16685 | final int value = child.getValue(); |
| 269 | 16685 | final int zone = (value & 0xF0) >> 4; |
| 270 | 16685 | final int index = (value & 0x0F); |
| 271 | ||
| 272 | 16685 | if (myChildren == null) { |
| 273 | 14216 | myChildren = new TrieNode[16][]; |
| 274 | 14216 | myChildren[zone] = new TrieNode[16]; |
| 275 | } | |
| 276 | 2469 | else if (myChildren[zone] == null) { |
| 277 | 64 | myChildren[zone] = new TrieNode[16]; |
| 278 | } | |
| 279 | ||
| 280 | 16685 | myChildren[zone][index] = child; |
| 281 | 16685 | } |
| 282 | ||
| 283 | /** | |
| 284 | * Returns the child node with the specified value. | |
| 285 | * | |
| 286 | * @param value | |
| 287 | * The value for the child to find. | |
| 288 | * @return The child node for the value or null if there is node | |
| 289 | * child with that value. | |
| 290 | */ | |
| 291 | public TrieNode child(final byte value) { | |
| 292 | 102086 | final int zone = (value & 0xF0) >> 4; |
| 293 | 102086 | final int index = (value & 0x0F); |
| 294 | ||
| 295 | 102086 | if ((myChildren != null) && (myChildren[zone] != null)) { |
| 296 | 87335 | return myChildren[zone][index]; |
| 297 | } | |
| 298 | 14751 | return null; |
| 299 | } | |
| 300 | ||
| 301 | /** | |
| 302 | * Returns the node's decoded value. | |
| 303 | * | |
| 304 | * @return The node's decoded value. | |
| 305 | */ | |
| 306 | public String getDecoded() { | |
| 307 | 1036 | return myDecoded; |
| 308 | } | |
| 309 | ||
| 310 | /** | |
| 311 | * Returns the node's value. | |
| 312 | * | |
| 313 | * @return The node's value. | |
| 314 | */ | |
| 315 | public byte getValue() { | |
| 316 | 16685 | return myValue; |
| 317 | } | |
| 318 | ||
| 319 | /** | |
| 320 | * Sets the decoded value for the node. | |
| 321 | * | |
| 322 | * @param decoded | |
| 323 | * The decoded value for the node. | |
| 324 | */ | |
| 325 | public void setDecoded(final String decoded) { | |
| 326 | 5614 | myDecoded = decoded; |
| 327 | 5614 | } |
| 328 | } | |
| 329 | } | |
| 330 | ||
| 331 | } |