View Javadoc
1   /*
2    * #%L
3    * AbstractStringCache.java - mongodb-async-driver - Allanbank Consulting, Inc.
4    * %%
5    * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc.
6    * %%
7    * Licensed under the Apache License, Version 2.0 (the "License");
8    * you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   * 
11   *      http://www.apache.org/licenses/LICENSE-2.0
12   * 
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   * #L%
19   */
20  
21  package com.allanbank.mongodb.bson.io;
22  
23  import java.io.Serializable;
24  import java.util.Comparator;
25  import java.util.Iterator;
26  import java.util.LinkedList;
27  import java.util.List;
28  import java.util.SortedMap;
29  import java.util.TreeMap;
30  import java.util.concurrent.ConcurrentHashMap;
31  import java.util.concurrent.ConcurrentMap;
32  import java.util.concurrent.atomic.AtomicInteger;
33  
34  /**
35   * AbstractStringCache provides the basic functionality for tracking string
36   * usage and triggering rebuilds of the cache over time.
37   * <p>
38   * The basic function of the cache is to maintain two structures. The first
39   * (provided by this class) tracks the usage of strings in the cache via a
40   * 'seen' {@link ConcurrentMap}. As the cache sees more usage it will
41   * periodically trim entries from the map that are not being used often (this is
42   * to limit memory usage). Once the cache thinks it has seen enough usage it
43   * will trigger the building of the runtime cache. This runtime cache does not
44   * require any locking as it is read-only after being constructed.
45   * </p>
46   * <p>
47   * There are two controls on the amount of caching instances of this class will
48   * do:
49   * </p>
50   * <ol>
51   * <li>
52   * {@link #getMaxCacheLength()} limits the length of the encoded bytes this
53   * class will try and retrieve from the cache. Setting this value to zero
54   * disables the cache and limit any memory overhead.</li>
55   * <li>{@link #getMaxCacheEntries()} controls the number of cached entries in
56   * the runtime-cache and how often the accumulated statistics are trimmed. Each
57   * entry represents a single encoded string.</li>
58   * </ol>
59   * 
60   * 
61   * @api.no This class is <b>NOT</b> part of the drivers API. This class may be
62   *         mutated in incompatible ways between any two releases of the driver.
63   * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
64   */
65  public abstract class AbstractStringCache {
66  
67      /** The default maximum number of entries to keep in the cache. */
68      public static final int DEFAULT_MAX_CACHE_ENTRIES = 24;
69  
70      /** The default maximum length byte array to cache. */
71      public static final int DEFAULT_MAX_CACHE_LENGTH = 25;
72  
73      /** The maximum value for the multiplier. */
74      protected static final int MAX_MULTIPLIER = 100;
75  
76      /**
77       * The maximum length of a string to cache. This can be used to stop a
78       * single long string from pushing useful values out of the cache.
79       */
80      protected int myMaxCacheLength;
81  
82      /** The maximum number of entries to have in the cache. */
83      protected int myMaxCachEntries;
84  
85      /**
86       * The multiplier for the number of times the cache is used before we
87       * re-load the cache.
88       */
89      protected volatile int myMaxCachEntriesMultiplier;
90  
91      /**
92       * The map of seen strings that will be used periodically to build the cache
93       * cache.
94       */
95      protected final ConcurrentMap<String, SeenString> mySeen;
96  
97      /**
98       * The number of time the cache has been used since the last re-load of
99       * runtime, read-only cache.
100      */
101     protected final AtomicInteger myUseCount;
102 
103     /**
104      * Creates a new AbstractStringCache.
105      */
106     public AbstractStringCache() {
107         super();
108 
109         mySeen = new ConcurrentHashMap<String, SeenString>(1024);
110 
111         myMaxCacheLength = DEFAULT_MAX_CACHE_LENGTH;
112         myMaxCachEntries = DEFAULT_MAX_CACHE_ENTRIES;
113 
114         myMaxCachEntriesMultiplier = 0;
115         myUseCount = new AtomicInteger(0);
116     }
117 
118     /**
119      * Returns the maximum node count.
120      * 
121      * @return The maximum node count.
122      */
123     public int getMaxCacheEntries() {
124         return myMaxCachEntries;
125     }
126 
127     /**
128      * Returns the maximum length of strings to cache. This can be used to stop
129      * a single long string from pushing useful values out of the cache.
130      * 
131      * @return The maximum length of strings to cache.
132      */
133     public int getMaxCacheLength() {
134         return myMaxCacheLength;
135     }
136 
137     /**
138      * Sets the value of maximum number of cached strings.
139      * 
140      * @param maxCacheEntries
141      *            The new value for the maximum number of cached strings.
142      */
143     public void setMaxCacheEntries(final int maxCacheEntries) {
144         myMaxCachEntries = maxCacheEntries;
145     }
146 
147     /**
148      * Sets the value of maximum length of strings to cache to the new value.
149      * This can be used to stop a single long string from pushing useful values
150      * out of the cache.
151      * 
152      * @param maxlength
153      *            The new value for the maximum length of strings to cache.
154      */
155     public void setMaxCacheLength(final int maxlength) {
156         myMaxCacheLength = maxlength;
157 
158         // The user has turned the cache off. Release all of the memory.
159         if (maxlength <= 0) {
160             clear();
161         }
162     }
163 
164     /**
165      * Notification that a string/byte[] have been used.
166      * 
167      * @param source
168      *            The bytes in the string.
169      * @param offset
170      *            The offset of the first byte.
171      * @param length
172      *            The length of the bytes with a terminal zero byte.
173      * @param decoded
174      *            The decoded string.
175      */
176     public void used(final String decoded, final byte[] source,
177             final int offset, final int length) {
178 
179         // Check for the easiest value to cache or entries that are too long.
180         if ((length <= 1) || (myMaxCacheLength < length)) {
181             return;
182         }
183 
184         SeenString entry = mySeen.get(decoded);
185         if (entry == null) {
186             entry = new SeenString(source, offset, length, decoded);
187 
188             final SeenString existing = mySeen.putIfAbsent(decoded, entry);
189             if (existing != null) {
190                 entry = existing;
191             }
192         }
193 
194         // Count the use.
195         entry.incrementCount();
196 
197         // Rebuild the count after period of use.
198         int use = myUseCount.incrementAndGet();
199         while ((myMaxCachEntries * myMaxCachEntriesMultiplier) < use) {
200             use = tryRebuild(use);
201         }
202 
203         // Periodically trim low count entries.
204         if ((use != 0) && ((use % myMaxCachEntries) == 0)) {
205             trimPending();
206         }
207     }
208 
209     /**
210      * Builds a map of the seen strings at each count. The order of the map is
211      * reversed so that higher counts are first in the map.
212      * 
213      * @return A map of the seen strings at each count.
214      */
215     protected SortedMap<Integer, List<SeenString>> buildCacheGroups() {
216         myMaxCachEntriesMultiplier = Math.min(myMaxCachEntriesMultiplier + 1,
217                 MAX_MULTIPLIER);
218 
219         // Copy the current entries.
220         final SortedMap<Integer, List<SeenString>> order = new TreeMap<Integer, List<SeenString>>(
221                 ReverseIntegerComparator.INSTANCE);
222 
223         for (final SeenString seen : mySeen.values()) {
224             final Integer seenCount = Integer.valueOf(seen.reset());
225             List<SeenString> seenAtCount = order.get(seenCount);
226             if (seenAtCount == null) {
227                 seenAtCount = new LinkedList<SeenString>();
228                 order.put(seenCount, seenAtCount);
229             }
230 
231             seenAtCount.add(seen);
232         }
233 
234         return order;
235     }
236 
237     /**
238      * Clears the cache.
239      */
240     protected void clear() {
241         mySeen.clear();
242         myUseCount.set(0);
243     }
244 
245     /**
246      * Rebuilds the cache from the current {@link #mySeen} map. By the end of
247      * the method the caceh should have been rebuild and the {@link #mySeen} map
248      * cleared.
249      */
250     protected abstract void rebuildCache();
251 
252     /**
253      * Trims the seen map of any entries that have not accumulated many counts.
254      * This is to avoid the seen map holding too many entries.
255      */
256     private void trimPending() {
257         // Only keep entries that have more than 1% (or more than 1 if 1% is <
258         // 1) entries.
259         final int trimLevel = Math.max(1, (int) (myMaxCachEntries * 0.01));
260         int toRemove = Math.max(0, mySeen.size() - myMaxCachEntries);
261 
262         final Iterator<SeenString> iter = mySeen.values().iterator();
263         while (iter.hasNext() && (0 < toRemove)) {
264             final SeenString seen = iter.next();
265             if (seen.getCount() <= trimLevel) {
266                 iter.remove();
267                 toRemove -= 1;
268             }
269         }
270     }
271 
272     /**
273      * Attempts to rebuild the cache.
274      * 
275      * @param use
276      *            The expected use count.
277      * @return The use count after the attempt.
278      */
279     private int tryRebuild(final int use) {
280         if (myUseCount.compareAndSet(use, 0)) {
281             rebuildCache();
282 
283             return 0;
284         }
285         return myUseCount.get();
286     }
287 
288     /**
289      * ReverseIntegerComparator provides a {@link Comparator} that considers
290      * higher value integers to be less then lower value integers.
291      * 
292      * @copyright 2014, Allanbank Consulting, Inc., All Rights Reserved
293      */
294     public static class ReverseIntegerComparator implements
295             Comparator<Integer>, Serializable {
296 
297         /** The single instance of the comparator. */
298         public static final Comparator<Integer> INSTANCE = new ReverseIntegerComparator();
299 
300         /** The serialization version for the class. */
301         private static final long serialVersionUID = 7342816815375930353L;
302 
303         /**
304          * Creates a new ReverseIntegerComparator. Private since this class is
305          * stateless and we only need a single copy.
306          */
307         private ReverseIntegerComparator() {
308             super();
309         }
310 
311         /**
312          * {@inheritDoc}
313          * <p>
314          * Overridden to reverse the comparison of integers.
315          * </p>
316          */
317         @Override
318         public int compare(final Integer o1, final Integer o2) {
319             final int value = o1.compareTo(o2);
320 
321             if (value < 0) {
322                 return 1;
323             }
324             else if (0 < value) {
325                 return -1;
326             }
327             else {
328                 return 0;
329             }
330         }
331     }
332 }