1 /*
2 * #%L
3 * AbstractStringCache.java - mongodb-async-driver - Allanbank Consulting, Inc.
4 * %%
5 * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc.
6 * %%
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * #L%
19 */
20
21 package com.allanbank.mongodb.bson.io;
22
23 import java.io.Serializable;
24 import java.util.Comparator;
25 import java.util.Iterator;
26 import java.util.LinkedList;
27 import java.util.List;
28 import java.util.SortedMap;
29 import java.util.TreeMap;
30 import java.util.concurrent.ConcurrentHashMap;
31 import java.util.concurrent.ConcurrentMap;
32 import java.util.concurrent.atomic.AtomicInteger;
33
34 /**
35 * AbstractStringCache provides the basic functionality for tracking string
36 * usage and triggering rebuilds of the cache over time.
37 * <p>
38 * The basic function of the cache is to maintain two structures. The first
39 * (provided by this class) tracks the usage of strings in the cache via a
40 * 'seen' {@link ConcurrentMap}. As the cache sees more usage it will
41 * periodically trim entries from the map that are not being used often (this is
42 * to limit memory usage). Once the cache thinks it has seen enough usage it
43 * will trigger the building of the runtime cache. This runtime cache does not
44 * require any locking as it is read-only after being constructed.
45 * </p>
46 * <p>
47 * There are two controls on the amount of caching instances of this class will
48 * do:
49 * </p>
50 * <ol>
51 * <li>
52 * {@link #getMaxCacheLength()} limits the length of the encoded bytes this
53 * class will try and retrieve from the cache. Setting this value to zero
54 * disables the cache and limit any memory overhead.</li>
55 * <li>{@link #getMaxCacheEntries()} controls the number of cached entries in
56 * the runtime-cache and how often the accumulated statistics are trimmed. Each
57 * entry represents a single encoded string.</li>
58 * </ol>
59 *
60 *
61 * @api.no This class is <b>NOT</b> part of the drivers API. This class may be
62 * mutated in incompatible ways between any two releases of the driver.
63 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
64 */
65 public abstract class AbstractStringCache {
66
67 /** The default maximum number of entries to keep in the cache. */
68 public static final int DEFAULT_MAX_CACHE_ENTRIES = 24;
69
70 /** The default maximum length byte array to cache. */
71 public static final int DEFAULT_MAX_CACHE_LENGTH = 25;
72
73 /** The maximum value for the multiplier. */
74 protected static final int MAX_MULTIPLIER = 100;
75
76 /**
77 * The maximum length of a string to cache. This can be used to stop a
78 * single long string from pushing useful values out of the cache.
79 */
80 protected int myMaxCacheLength;
81
82 /** The maximum number of entries to have in the cache. */
83 protected int myMaxCachEntries;
84
85 /**
86 * The multiplier for the number of times the cache is used before we
87 * re-load the cache.
88 */
89 protected volatile int myMaxCachEntriesMultiplier;
90
91 /**
92 * The map of seen strings that will be used periodically to build the cache
93 * cache.
94 */
95 protected final ConcurrentMap<String, SeenString> mySeen;
96
97 /**
98 * The number of time the cache has been used since the last re-load of
99 * runtime, read-only cache.
100 */
101 protected final AtomicInteger myUseCount;
102
103 /**
104 * Creates a new AbstractStringCache.
105 */
106 public AbstractStringCache() {
107 super();
108
109 mySeen = new ConcurrentHashMap<String, SeenString>(1024);
110
111 myMaxCacheLength = DEFAULT_MAX_CACHE_LENGTH;
112 myMaxCachEntries = DEFAULT_MAX_CACHE_ENTRIES;
113
114 myMaxCachEntriesMultiplier = 0;
115 myUseCount = new AtomicInteger(0);
116 }
117
118 /**
119 * Returns the maximum node count.
120 *
121 * @return The maximum node count.
122 */
123 public int getMaxCacheEntries() {
124 return myMaxCachEntries;
125 }
126
127 /**
128 * Returns the maximum length of strings to cache. This can be used to stop
129 * a single long string from pushing useful values out of the cache.
130 *
131 * @return The maximum length of strings to cache.
132 */
133 public int getMaxCacheLength() {
134 return myMaxCacheLength;
135 }
136
137 /**
138 * Sets the value of maximum number of cached strings.
139 *
140 * @param maxCacheEntries
141 * The new value for the maximum number of cached strings.
142 */
143 public void setMaxCacheEntries(final int maxCacheEntries) {
144 myMaxCachEntries = maxCacheEntries;
145 }
146
147 /**
148 * Sets the value of maximum length of strings to cache to the new value.
149 * This can be used to stop a single long string from pushing useful values
150 * out of the cache.
151 *
152 * @param maxlength
153 * The new value for the maximum length of strings to cache.
154 */
155 public void setMaxCacheLength(final int maxlength) {
156 myMaxCacheLength = maxlength;
157
158 // The user has turned the cache off. Release all of the memory.
159 if (maxlength <= 0) {
160 clear();
161 }
162 }
163
164 /**
165 * Notification that a string/byte[] have been used.
166 *
167 * @param source
168 * The bytes in the string.
169 * @param offset
170 * The offset of the first byte.
171 * @param length
172 * The length of the bytes with a terminal zero byte.
173 * @param decoded
174 * The decoded string.
175 */
176 public void used(final String decoded, final byte[] source,
177 final int offset, final int length) {
178
179 // Check for the easiest value to cache or entries that are too long.
180 if ((length <= 1) || (myMaxCacheLength < length)) {
181 return;
182 }
183
184 SeenString entry = mySeen.get(decoded);
185 if (entry == null) {
186 entry = new SeenString(source, offset, length, decoded);
187
188 final SeenString existing = mySeen.putIfAbsent(decoded, entry);
189 if (existing != null) {
190 entry = existing;
191 }
192 }
193
194 // Count the use.
195 entry.incrementCount();
196
197 // Rebuild the count after period of use.
198 int use = myUseCount.incrementAndGet();
199 while ((myMaxCachEntries * myMaxCachEntriesMultiplier) < use) {
200 use = tryRebuild(use);
201 }
202
203 // Periodically trim low count entries.
204 if ((use != 0) && ((use % myMaxCachEntries) == 0)) {
205 trimPending();
206 }
207 }
208
209 /**
210 * Builds a map of the seen strings at each count. The order of the map is
211 * reversed so that higher counts are first in the map.
212 *
213 * @return A map of the seen strings at each count.
214 */
215 protected SortedMap<Integer, List<SeenString>> buildCacheGroups() {
216 myMaxCachEntriesMultiplier = Math.min(myMaxCachEntriesMultiplier + 1,
217 MAX_MULTIPLIER);
218
219 // Copy the current entries.
220 final SortedMap<Integer, List<SeenString>> order = new TreeMap<Integer, List<SeenString>>(
221 ReverseIntegerComparator.INSTANCE);
222
223 for (final SeenString seen : mySeen.values()) {
224 final Integer seenCount = Integer.valueOf(seen.reset());
225 List<SeenString> seenAtCount = order.get(seenCount);
226 if (seenAtCount == null) {
227 seenAtCount = new LinkedList<SeenString>();
228 order.put(seenCount, seenAtCount);
229 }
230
231 seenAtCount.add(seen);
232 }
233
234 return order;
235 }
236
237 /**
238 * Clears the cache.
239 */
240 protected void clear() {
241 mySeen.clear();
242 myUseCount.set(0);
243 }
244
245 /**
246 * Rebuilds the cache from the current {@link #mySeen} map. By the end of
247 * the method the caceh should have been rebuild and the {@link #mySeen} map
248 * cleared.
249 */
250 protected abstract void rebuildCache();
251
252 /**
253 * Trims the seen map of any entries that have not accumulated many counts.
254 * This is to avoid the seen map holding too many entries.
255 */
256 private void trimPending() {
257 // Only keep entries that have more than 1% (or more than 1 if 1% is <
258 // 1) entries.
259 final int trimLevel = Math.max(1, (int) (myMaxCachEntries * 0.01));
260 int toRemove = Math.max(0, mySeen.size() - myMaxCachEntries);
261
262 final Iterator<SeenString> iter = mySeen.values().iterator();
263 while (iter.hasNext() && (0 < toRemove)) {
264 final SeenString seen = iter.next();
265 if (seen.getCount() <= trimLevel) {
266 iter.remove();
267 toRemove -= 1;
268 }
269 }
270 }
271
272 /**
273 * Attempts to rebuild the cache.
274 *
275 * @param use
276 * The expected use count.
277 * @return The use count after the attempt.
278 */
279 private int tryRebuild(final int use) {
280 if (myUseCount.compareAndSet(use, 0)) {
281 rebuildCache();
282
283 return 0;
284 }
285 return myUseCount.get();
286 }
287
288 /**
289 * ReverseIntegerComparator provides a {@link Comparator} that considers
290 * higher value integers to be less then lower value integers.
291 *
292 * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
293 */
294 public static class ReverseIntegerComparator implements
295 Comparator<Integer>, Serializable {
296
297 /** The single instance of the comparator. */
298 public static final Comparator<Integer> INSTANCE = new ReverseIntegerComparator();
299
300 /** The serialization version for the class. */
301 private static final long serialVersionUID = 7342816815375930353L;
302
303 /**
304 * Creates a new ReverseIntegerComparator. Private since this class is
305 * stateless and we only need a single copy.
306 */
307 private ReverseIntegerComparator() {
308 super();
309 }
310
311 /**
312 * {@inheritDoc}
313 * <p>
314 * Overridden to reverse the comparison of integers.
315 * </p>
316 */
317 @Override
318 public int compare(final Integer o1, final Integer o2) {
319 final int value = o1.compareTo(o2);
320
321 if (value < 0) {
322 return 1;
323 }
324 else if (0 < value) {
325 return -1;
326 }
327 else {
328 return 0;
329 }
330 }
331 }
332 }