Coverage Report - com.allanbank.mongodb.bson.io.StringEncoder
 
Classes in this File Line Coverage Branch Coverage Complexity
StringEncoder
93%
59/63
85%
29/34
3.111
 
 1  
 /*
 2  
  * #%L
 3  
  * StringEncoder.java - mongodb-async-driver - Allanbank Consulting, Inc.
 4  
  * %%
 5  
  * Copyright (C) 2011 - 2014 Allanbank Consulting, Inc.
 6  
  * %%
 7  
  * Licensed under the Apache License, Version 2.0 (the "License");
 8  
  * you may not use this file except in compliance with the License.
 9  
  * You may obtain a copy of the License at
 10  
  * 
 11  
  *      http://www.apache.org/licenses/LICENSE-2.0
 12  
  * 
 13  
  * Unless required by applicable law or agreed to in writing, software
 14  
  * distributed under the License is distributed on an "AS IS" BASIS,
 15  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16  
  * See the License for the specific language governing permissions and
 17  
  * limitations under the License.
 18  
  * #L%
 19  
  */
 20  
 
 21  
 package com.allanbank.mongodb.bson.io;
 22  
 
 23  
 import java.io.IOException;
 24  
 import java.io.OutputStream;
 25  
 
 26  
 /**
 27  
  * StringEncoder provides a single location for the string encoding and sizing
 28  
  * logic. This class if backed by a cache of strings to the encoded bytes.
 29  
  * <p>
 30  
  * The cache is controlled via two parameters:
 31  
  * 
 32  
  * @api.no This class is <b>NOT</b> part of the drivers API. This class may be
 33  
  *         mutated in incompatible ways between any two releases of the driver.
 34  
  * @copyright 2013, Allanbank Consulting, Inc., All Rights Reserved
 35  
  */
 36  
 public class StringEncoder {
 37  
 
 38  
     /**
 39  
      * Returns the visitor's output buffer.
 40  
      * 
 41  
      * @param string
 42  
      *            The 'C' string to determine the size of.
 43  
      * @return The visitor's output buffer.
 44  
      */
 45  
     public static int computeCStringSize(final String string) {
 46  17
         return utf8Size(string) + 1;
 47  
     }
 48  
 
 49  
     /**
 50  
      * Returns the visitor's output buffer.
 51  
      * 
 52  
      * @param string
 53  
      *            The 'UTF8' string to determine the size of.
 54  
      * @return The visitor's output buffer.
 55  
      */
 56  
     public static int computeStringSize(final String string) {
 57  21
         return 4 + utf8Size(string) + 1;
 58  
     }
 59  
 
 60  
     /**
 61  
      * Computes the size of the encoded UTF8 String based on the table below.
 62  
      * 
 63  
      * <pre>
 64  
      * #    Code Points      Bytes
 65  
      * 1    U+0000..U+007F   1
 66  
      * 
 67  
      * 2    U+0080..U+07FF   2
 68  
      * 
 69  
      * 3    U+0800..U+0FFF   3
 70  
      *      U+1000..U+FFFF
 71  
      * 
 72  
      * 4   U+10000..U+3FFFF  4
 73  
      *     U+40000..U+FFFFF  4
 74  
      *    U+100000..U10FFFF  4
 75  
      * </pre>
 76  
      * 
 77  
      * @param string
 78  
      *            The string to determine the length of.
 79  
      * @return The length of the string encoded as UTF8.
 80  
      */
 81  
     public static int utf8Size(final String string) {
 82  2778367
         final int strLength = (string == null) ? 0 : string.length();
 83  
 
 84  2778367
         int length = 0;
 85  
         int codePoint;
 86  10185988
         for (int i = 0; i < strLength; i += Character.charCount(codePoint)) {
 87  7407621
             codePoint = Character.codePointAt(string, i);
 88  7407621
             if (codePoint < 0x80) {
 89  7407543
                 length += 1;
 90  
             }
 91  78
             else if (codePoint < 0x800) {
 92  22
                 length += 2;
 93  
             }
 94  56
             else if (codePoint < 0x10000) {
 95  24
                 length += 3;
 96  
             }
 97  
             else {
 98  32
                 length += 4;
 99  
             }
 100  
         }
 101  
 
 102  2778367
         return length;
 103  
     }
 104  
 
 105  
     /** A private buffer for encoding strings. */
 106  3460
     private final byte[] myBuffer = new byte[1024];
 107  
 
 108  
     /** The cache of strings to bytes. */
 109  
     private final StringEncoderCache myCache;
 110  
 
 111  
     /**
 112  
      * Creates a new StringEncoder.
 113  
      */
 114  
     public StringEncoder() {
 115  2
         this(new StringEncoderCache());
 116  2
     }
 117  
 
 118  
     /**
 119  
      * Creates a new StringEncoder.
 120  
      * 
 121  
      * @param cache
 122  
      *            The cache for the encoder to use.
 123  
      */
 124  3460
     public StringEncoder(final StringEncoderCache cache) {
 125  3460
         myCache = cache;
 126  3460
     }
 127  
 
 128  
     /**
 129  
      * Writes the string as a UTF-8 string. This method handles the
 130  
      * "normal/easy" cases and delegates to the full character set if things get
 131  
      * complicated.
 132  
      * 
 133  
      * @param string
 134  
      *            The string to encode.
 135  
      * @param out
 136  
      *            The stream to write to.
 137  
      * @throws IOException
 138  
      *             On a failure to write the bytes.
 139  
      */
 140  
     public void encode(final String string, final OutputStream out)
 141  
             throws IOException {
 142  
 
 143  116518
         if (!string.isEmpty()) {
 144  114099
             final byte[] encoded = myCache.find(string);
 145  
 
 146  114098
             if (encoded == null) {
 147  
                 // Cache miss - write the bytes straight to the stream.
 148  77347
                 fastEncode(string, out);
 149  
             }
 150  
             else {
 151  36752
                 myCache.used(string, encoded, 0, encoded.length);
 152  36752
                 out.write(encoded);
 153  
             }
 154  
         }
 155  116518
     }
 156  
 
 157  
     /**
 158  
      * Computes the size of the encoded UTF8 String based on the table below.
 159  
      * This method may use a cached copy of the encoded string to determine the
 160  
      * size.
 161  
      * 
 162  
      * <pre>
 163  
      * #    Code Points      Bytes
 164  
      * 1    U+0000..U+007F   1
 165  
      * 
 166  
      * 2    U+0080..U+07FF   2
 167  
      * 
 168  
      * 3    U+0800..U+0FFF   3
 169  
      *      U+1000..U+FFFF
 170  
      * 
 171  
      * 4   U+10000..U+3FFFF  4
 172  
      *     U+40000..U+FFFFF  4
 173  
      *    U+100000..U10FFFF  4
 174  
      * </pre>
 175  
      * 
 176  
      * @param string
 177  
      *            The string to determine the length of.
 178  
      * @return The length of the string encoded as UTF8.
 179  
      */
 180  
     public int encodeSize(final String string) {
 181  2935
         if (string.isEmpty()) {
 182  1
             return 0;
 183  
         }
 184  
 
 185  2934
         final byte[] cached = myCache.find(string);
 186  2934
         if (cached != null) {
 187  
             // Don't count this as a usage. Just bonus speed.
 188  2450
             return cached.length;
 189  
         }
 190  484
         return utf8Size(string);
 191  
     }
 192  
 
 193  
     /**
 194  
      * Returns the cache value.
 195  
      * 
 196  
      * @return The cache value.
 197  
      * @deprecated The cache {@link StringEncoderCache} should be controlled
 198  
      *             directly. This method will be removed after the 2.1.0
 199  
      *             release.
 200  
      */
 201  
     @Deprecated
 202  
     public StringEncoderCache getCache() {
 203  1
         return myCache;
 204  
     }
 205  
 
 206  
     /**
 207  
      * Writes the string as a UTF-8 string. This method handles the
 208  
      * "normal/easy" cases and delegates to the full character set if things get
 209  
      * complicated.
 210  
      * 
 211  
      * @param string
 212  
      *            The string to encode.
 213  
      * @param out
 214  
      *            The stream to write to.
 215  
      * @throws IOException
 216  
      *             On a failure to write the bytes.
 217  
      */
 218  
     protected void fastEncode(final String string, final OutputStream out)
 219  
             throws IOException {
 220  
         // 4 = max encoded bytes/code point.
 221  77347
         final int writeUpTo = myBuffer.length - 4;
 222  77347
         final int strLength = string.length();
 223  
 
 224  77347
         boolean bufferHasAllBytes = true;
 225  
 
 226  77347
         int bufferOffset = 0;
 227  
         int codePoint;
 228  416653
         for (int i = 0; i < strLength; i += Character.charCount(codePoint)) {
 229  
 
 230  
             // Check for buffer overflow.
 231  339306
             if (writeUpTo < bufferOffset) {
 232  0
                 bufferHasAllBytes = false;
 233  0
                 if (out != null) {
 234  0
                     out.write(myBuffer, 0, bufferOffset);
 235  
                 }
 236  0
                 bufferOffset = 0;
 237  
             }
 238  
 
 239  339306
             codePoint = Character.codePointAt(string, i);
 240  339306
             if (codePoint < 0x80) {
 241  339273
                 myBuffer[bufferOffset++] = (byte) codePoint;
 242  
             }
 243  33
             else if (codePoint < 0x800) {
 244  10
                 myBuffer[bufferOffset++] = (byte) (0xC0 + ((codePoint >> 6) & 0xFF));
 245  10
                 myBuffer[bufferOffset++] = (byte) (0x80 + ((codePoint >> 0) & 0x3F));
 246  
             }
 247  23
             else if (codePoint < 0x10000) {
 248  11
                 myBuffer[bufferOffset++] = (byte) (0xE0 + ((codePoint >> 12) & 0xFF));
 249  11
                 myBuffer[bufferOffset++] = (byte) (0x80 + ((codePoint >> 6) & 0x3F));
 250  11
                 myBuffer[bufferOffset++] = (byte) (0x80 + ((codePoint >> 0) & 0x3f));
 251  
             }
 252  
             else {
 253  12
                 myBuffer[bufferOffset++] = (byte) (0xF0 + ((codePoint >> 18) & 0xFF));
 254  12
                 myBuffer[bufferOffset++] = (byte) (0x80 + ((codePoint >> 12) & 0x3F));
 255  12
                 myBuffer[bufferOffset++] = (byte) (0x80 + ((codePoint >> 6) & 0x3F));
 256  12
                 myBuffer[bufferOffset++] = (byte) (0x80 + ((codePoint >> 0) & 0x3F));
 257  
             }
 258  
         }
 259  
 
 260  
         // Write out the final results.
 261  77347
         if (out != null) {
 262  77347
             out.write(myBuffer, 0, bufferOffset);
 263  
         }
 264  
 
 265  
         // ... and try and save it in the cache.
 266  77347
         if (bufferHasAllBytes) {
 267  77347
             myCache.used(string, myBuffer, 0, bufferOffset);
 268  
         }
 269  77347
     }
 270  
 }