1 /************************************************************************************ 2 Includes 3 ************************************************************************************/ 4 #include "All.h" 5 #include "BitArray.h" 6 #include "MD5.h" 7 8 /************************************************************************************ 9 Declares 10 ************************************************************************************/ 11 #define BIT_ARRAY_ELEMENTS (4096) // the number of elements in the bit array (4 MB) 12 #define BIT_ARRAY_BYTES (BIT_ARRAY_ELEMENTS * 4) // the number of bytes in the bit array 13 #define BIT_ARRAY_BITS (BIT_ARRAY_BYTES * 8) // the number of bits in the bit array 14 15 #define MAX_ELEMENT_BITS 128 16 #define REFILL_BIT_THRESHOLD (BIT_ARRAY_BITS - MAX_ELEMENT_BITS) 17 18 #define CODE_BITS 32 19 #define TOP_VALUE ((unsigned int) 1 << (CODE_BITS - 1)) 20 #define SHIFT_BITS (CODE_BITS - 9) 21 #define EXTRA_BITS ((CODE_BITS - 2) % 8 + 1) 22 #define BOTTOM_VALUE (TOP_VALUE >> 8) 23 24 /************************************************************************************ 25 Lookup tables 26 ************************************************************************************/ 27 const uint32 K_SUM_MIN_BOUNDARY[32] = {0,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,16777216,33554432,67108864,134217728,268435456,536870912,1073741824,2147483648UL,0,0,0,0}; 28 29 #define MODEL_ELEMENTS 64 30 #define RANGE_OVERFLOW_TOTAL_WIDTH 65536 31 #define RANGE_OVERFLOW_SHIFT 16 32 33 const uint32 RANGE_TOTAL[64] = {0,19578,36160,48417,56323,60899,63265,64435,64971,65232,65351,65416,65447,65466,65476,65482,65485,65488,65490,65491,65492,65493,65494,65495,65496,65497,65498,65499,65500,65501,65502,65503,65504,65505,65506,65507,65508,65509,65510,65511,65512,65513,65514,65515,65516,65517,65518,65519,65520,65521,65522,65523,65524,65525,65526,65527,65528,65529,65530,65531,65532,65533,65534,65535,}; 34 const uint32 RANGE_WIDTH[64] = {19578,16582,12257,7906,4576,2366,1170,536,261,119,65,31,19,10,6,3,3,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,}; 35 36 #ifdef BUILD_RANGE_TABLE 37 int g_aryOverflows[256] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; 38 int g_nTotalOverflow = 0; 39 #endif 40 41 /************************************************************************************ 42 Constructor 43 ************************************************************************************/ 44 CBitArray::CBitArray(CIO *pIO) 45 { 46 // allocate memory for the bit array 47 m_pBitArray = new uint32 [BIT_ARRAY_ELEMENTS]; 48 memset(m_pBitArray, 0, BIT_ARRAY_BYTES); 49 50 // initialize other variables 51 m_nCurrentBitIndex = 0; 52 m_pIO = pIO; 53 } 54 55 /************************************************************************************ 56 Destructor 57 ************************************************************************************/ 58 CBitArray::~CBitArray() 59 { 60 // free the bit array 61 SAFE_ARRAY_DELETE(m_pBitArray) 62 #ifdef BUILD_RANGE_TABLE 63 OutputRangeTable(); 64 #endif 65 } 66 67 /************************************************************************************ 68 Output the bit array via the CIO (typically saves to disk) 69 ************************************************************************************/ 70 int CBitArray::OutputBitArray(BOOL bFinalize) 71 { 72 // write the entire file to disk 73 unsigned int nBytesWritten = 0; 74 unsigned int nBytesToWrite = 0; 75 // unsigned int nRetVal = 0; 76 77 if (bFinalize) 78 { 79 nBytesToWrite = ((m_nCurrentBitIndex >> 5) * 4) + 4; 80 81 m_MD5.AddData(m_pBitArray, nBytesToWrite); 82 83 RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten)) 84 85 // reset the bit pointer 86 m_nCurrentBitIndex = 0; 87 } 88 else 89 { 90 nBytesToWrite = (m_nCurrentBitIndex >> 5) * 4; 91 92 m_MD5.AddData(m_pBitArray, nBytesToWrite); 93 94 RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten)) 95 96 // move the last value to the front of the bit array 97 m_pBitArray[0] = m_pBitArray[m_nCurrentBitIndex >> 5]; 98 m_nCurrentBitIndex = (m_nCurrentBitIndex & 31); 99 100 // zero the rest of the memory (may not need the +1 because of frame byte alignment) 101 memset(&m_pBitArray[1], 0, min(nBytesToWrite + 1, BIT_ARRAY_BYTES - 1)); 102 } 103 104 // return a success 105 return ERROR_SUCCESS; 106 } 107 108 /************************************************************************************ 109 Range coding macros -- ugly, but outperform inline's (every cycle counts here) 110 ************************************************************************************/ 111 #define PUTC(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= ((VALUE) & 0xFF) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8; 112 #define PUTC_NOCAP(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= (VALUE) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8; 113 114 #define NORMALIZE_RANGE_CODER \ 115 while (m_RangeCoderInfo.range <= BOTTOM_VALUE) \ 116 { \ 117 if (m_RangeCoderInfo.low < (0xFF << SHIFT_BITS)) \ 118 { \ 119 PUTC(m_RangeCoderInfo.buffer); \ 120 for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--) { PUTC_NOCAP(0xFF); } \ 121 m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS); \ 122 } \ 123 else if (m_RangeCoderInfo.low & TOP_VALUE) \ 124 { \ 125 PUTC(m_RangeCoderInfo.buffer + 1); \ 126 m_nCurrentBitIndex += (m_RangeCoderInfo.help * 8); \ 127 m_RangeCoderInfo.help = 0; \ 128 m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS); \ 129 } \ 130 else \ 131 { \ 132 m_RangeCoderInfo.help++; \ 133 } \ 134 \ 135 m_RangeCoderInfo.low = (m_RangeCoderInfo.low << 8) & (TOP_VALUE - 1); \ 136 m_RangeCoderInfo.range <<= 8; \ 137 } 138 139 #define ENCODE_FAST(RANGE_WIDTH, RANGE_TOTAL, SHIFT) \ 140 NORMALIZE_RANGE_CODER \ 141 const int nTemp = m_RangeCoderInfo.range >> (SHIFT); \ 142 m_RangeCoderInfo.range = nTemp * (RANGE_WIDTH); \ 143 m_RangeCoderInfo.low += nTemp * (RANGE_TOTAL); 144 145 #define ENCODE_DIRECT(VALUE, SHIFT) \ 146 NORMALIZE_RANGE_CODER \ 147 m_RangeCoderInfo.range = m_RangeCoderInfo.range >> (SHIFT); \ 148 m_RangeCoderInfo.low += m_RangeCoderInfo.range * (VALUE); 149 150 /************************************************************************************ 151 Directly encode bits to the bitstream 152 ************************************************************************************/ 153 int CBitArray::EncodeBits(unsigned int nValue, int nBits) 154 { 155 // make sure there is room for the data 156 // this is a little slower than ensuring a huge block to start with, but it's safer 157 if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD) 158 { 159 RETURN_ON_ERROR(OutputBitArray()) 160 } 161 162 ENCODE_DIRECT(nValue, nBits); 163 return 0; 164 } 165 166 /************************************************************************************ 167 Encodes an unsigned int to the bit array (no rice coding) 168 ************************************************************************************/ 169 int CBitArray::EncodeUnsignedLong(unsigned int n) 170 { 171 // make sure there are at least 8 bytes in the buffer 172 if (m_nCurrentBitIndex > (BIT_ARRAY_BYTES - 8)) 173 { 174 RETURN_ON_ERROR(OutputBitArray()) 175 } 176 177 // encode the value 178 uint32 nBitArrayIndex = m_nCurrentBitIndex >> 5; 179 int nBitIndex = m_nCurrentBitIndex & 31; 180 181 if (nBitIndex == 0) 182 { 183 m_pBitArray[nBitArrayIndex] = n; 184 } 185 else 186 { 187 m_pBitArray[nBitArrayIndex] |= n >> nBitIndex; 188 m_pBitArray[nBitArrayIndex + 1] = n << (32 - nBitIndex); 189 } 190 191 m_nCurrentBitIndex += 32; 192 193 return 0; 194 } 195 196 /************************************************************************************ 197 Advance to a byte boundary (for frame alignment) 198 ************************************************************************************/ 199 void CBitArray::AdvanceToByteBoundary() 200 { 201 while (m_nCurrentBitIndex % 8) 202 m_nCurrentBitIndex++; 203 } 204 205 /************************************************************************************ 206 Encode a value 207 ************************************************************************************/ 208 int CBitArray::EncodeValue(int nEncode, BIT_ARRAY_STATE & BitArrayState) 209 { 210 // make sure there is room for the data 211 // this is a little slower than ensuring a huge block to start with, but it's safer 212 if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD) 213 { 214 RETURN_ON_ERROR(OutputBitArray()) 215 } 216 217 // convert to unsigned 218 nEncode = (nEncode > 0) ? nEncode * 2 - 1 : -nEncode * 2; 219 220 int nOriginalKSum = BitArrayState.nKSum; 221 222 // get the working k 223 // int nTempK = (BitArrayState.k) ? BitArrayState.k - 1 : 0; 224 225 // update nKSum 226 BitArrayState.nKSum += ((nEncode + 1) / 2) - ((BitArrayState.nKSum + 16) >> 5); 227 228 // update k 229 if (BitArrayState.nKSum < K_SUM_MIN_BOUNDARY[BitArrayState.k]) 230 BitArrayState.k--; 231 else if (BitArrayState.nKSum >= K_SUM_MIN_BOUNDARY[BitArrayState.k + 1]) 232 BitArrayState.k++; 233 234 // figure the pivot value 235 int nPivotValue = max(nOriginalKSum / 32, 1); 236 int nOverflow = nEncode / nPivotValue; 237 int nBase = nEncode - (nOverflow * nPivotValue); 238 239 // store the overflow 240 if (nOverflow < (MODEL_ELEMENTS - 1)) 241 { 242 ENCODE_FAST(RANGE_WIDTH[nOverflow], RANGE_TOTAL[nOverflow], RANGE_OVERFLOW_SHIFT); 243 244 #ifdef BUILD_RANGE_TABLE 245 g_aryOverflows[nOverflow]++; 246 g_nTotalOverflow++; 247 #endif 248 } 249 else 250 { 251 // store the "special" overflow (tells that perfect k is encoded next) 252 ENCODE_FAST(RANGE_WIDTH[MODEL_ELEMENTS - 1], RANGE_TOTAL[MODEL_ELEMENTS - 1], RANGE_OVERFLOW_SHIFT); 253 254 #ifdef BUILD_RANGE_TABLE 255 g_aryOverflows[MODEL_ELEMENTS - 1]++; 256 g_nTotalOverflow++; 257 #endif 258 259 // code the overflow using straight bits 260 ENCODE_DIRECT((nOverflow >> 16) & 0xFFFF, 16); 261 ENCODE_DIRECT(nOverflow & 0xFFFF, 16); 262 } 263 264 // code the base 265 { 266 if (nPivotValue >= (1 << 16)) 267 { 268 int nPivotValueBits = 0; 269 while ((nPivotValue >> nPivotValueBits) > 0) { nPivotValueBits++; } 270 int nSplitFactor = 1 << (nPivotValueBits - 16); 271 272 // we know that base is smaller than pivot coming into this 273 // however, after we divide both by an integer, they could be the same 274 // we account by adding one to the pivot, but this hurts compression 275 // by (1 / nSplitFactor) -- therefore we maximize the split factor 276 // that gets one added to it 277 278 // encode the pivot as two pieces 279 int nPivotValueA = (nPivotValue / nSplitFactor) + 1; 280 int nPivotValueB = nSplitFactor; 281 282 int nBaseA = nBase / nSplitFactor; 283 int nBaseB = nBase % nSplitFactor; 284 285 { 286 NORMALIZE_RANGE_CODER 287 const int nTemp = m_RangeCoderInfo.range / nPivotValueA; 288 m_RangeCoderInfo.range = nTemp; 289 m_RangeCoderInfo.low += nTemp * nBaseA; 290 } 291 292 { 293 NORMALIZE_RANGE_CODER 294 const int nTemp = m_RangeCoderInfo.range / nPivotValueB; 295 m_RangeCoderInfo.range = nTemp; 296 m_RangeCoderInfo.low += nTemp * nBaseB; 297 } 298 } 299 else 300 { 301 302 NORMALIZE_RANGE_CODER 303 const int nTemp = m_RangeCoderInfo.range / nPivotValue; 304 m_RangeCoderInfo.range = nTemp; 305 m_RangeCoderInfo.low += nTemp * nBase; 306 } 307 } 308 309 return 0; 310 } 311 312 /************************************************************************************ 313 Flush 314 ************************************************************************************/ 315 void CBitArray::FlushBitArray() 316 { 317 // advance to a byte boundary (for alignment) 318 AdvanceToByteBoundary(); 319 320 // the range coder 321 m_RangeCoderInfo.low = 0; // full code range 322 m_RangeCoderInfo.range = TOP_VALUE; 323 m_RangeCoderInfo.buffer = 0; 324 m_RangeCoderInfo.help = 0; // no bytes to follow 325 } 326 327 void CBitArray::FlushState(BIT_ARRAY_STATE & BitArrayState) 328 { 329 // k and ksum 330 BitArrayState.k = 10; 331 BitArrayState.nKSum = (1 << BitArrayState.k) * 16; 332 } 333 334 /************************************************************************************ 335 Finalize 336 ************************************************************************************/ 337 void CBitArray::Finalize() 338 { 339 NORMALIZE_RANGE_CODER 340 341 unsigned int nTemp = (m_RangeCoderInfo.low >> SHIFT_BITS) + 1; 342 343 if (nTemp > 0xFF) // we have a carry 344 { 345 PUTC(m_RangeCoderInfo.buffer + 1); 346 for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--) 347 { 348 PUTC(0); 349 } 350 } 351 else // no carry 352 { 353 PUTC(m_RangeCoderInfo.buffer); 354 for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--) 355 { 356 PUTC(((unsigned char) 0xFF)); 357 } 358 } 359 360 // we must output these bytes so the decoder can properly work at the end of the stream 361 PUTC(nTemp & 0xFF); 362 PUTC(0); 363 PUTC(0); 364 PUTC(0); 365 } 366 367 /************************************************************************************ 368 Build a range table (for development / debugging) 369 ************************************************************************************/ 370 #ifdef BUILD_RANGE_TABLE 371 void CBitArray::OutputRangeTable() 372 { 373 int z; 374 375 if (g_nTotalOverflow == 0) return; 376 377 int nTotal = 0; 378 int aryWidth[256]; ZeroMemory(aryWidth, 256 * 4); 379 for (z = 0; z < MODEL_ELEMENTS; z++) 380 { 381 aryWidth[z] = int(((float(g_aryOverflows[z]) * float(65536)) + (g_nTotalOverflow / 2)) / float(g_nTotalOverflow)); 382 if (aryWidth[z] == 0) aryWidth[z] = 1; 383 nTotal += aryWidth[z]; 384 } 385 386 z = 0; 387 while (nTotal > 65536) 388 { 389 if (aryWidth[z] != 1) 390 { 391 aryWidth[z]--; 392 nTotal--; 393 } 394 z++; 395 if (z == MODEL_ELEMENTS) z = 0; 396 } 397 398 z = 0; 399 while (nTotal < 65536) 400 { 401 aryWidth[z++]++; 402 nTotal++; 403 if (z == MODEL_ELEMENTS) z = 0; 404 } 405 406 int aryTotal[256]; ZeroMemory(aryTotal, 256 * 4); 407 for (z = 0; z < MODEL_ELEMENTS; z++) 408 { 409 for (int q = 0; q < z; q++) 410 { 411 aryTotal[z] += aryWidth[q]; 412 } 413 } 414 415 TCHAR buf[1024]; 416 _stprintf(buf, _T("const uint32 RANGE_TOTAL[%d] = {"), MODEL_ELEMENTS); 417 ODS(buf); 418 for (z = 0; z < MODEL_ELEMENTS; z++) 419 { 420 _stprintf(buf, _T("%d,"), aryTotal[z]); 421 OutputDebugString(buf); 422 } 423 ODS(_T("};\n")); 424 425 _stprintf(buf, _T("const uint32 RANGE_WIDTH[%d] = {"), MODEL_ELEMENTS); 426 ODS(buf); 427 for (z = 0; z < MODEL_ELEMENTS; z++) 428 { 429 _stprintf(buf, _T("%d,"), aryWidth[z]); 430 OutputDebugString(buf); 431 } 432 ODS(_T("};\n\n")); 433 } 434 #endif // #ifdef BUILD_RANGE_TABLE 435