1 // MD5Checksum.h: interface for the MD5Checksum class. 2 // 3 ////////////////////////////////////////////////////////////////////// 4 5 #ifndef MD5CHECKSUM_HEADER 6 #define MD5CHECKSUM_HEADER 7 #include "DefineHeader.h" 8 9 /**************************************************************************************** 10 This software is derived from the RSA Data Security, Inc. MD5 Message-Digest Algorithm. 11 Incorporation of this statement is a condition of use; please see the RSA 12 Data Security Inc copyright notice below:- 13 14 Copyright (C) 1990-2, RSA Data Security, Inc. Created 1990. All 15 rights reserved. 16 17 RSA Data Security, Inc. makes no representations concerning either 18 the merchantability of this software or the suitability of this 19 software for any particular purpose. It is provided "as is" 20 without express or implied warranty of any kind. 21 22 These notices must be retained in any copies of any part of this 23 documentation and/or software. 24 25 Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All 26 rights reserved. 27 License to copy and use this software is granted provided that it 28 is identified as the "RSA Data Security, Inc. MD5 Message-Digest 29 Algorithm" in all material mentioning or referencing this software 30 or this function. 31 License is also granted to make and use derivative works provided 32 that such works are identified as "derived from the RSA Data 33 Security, Inc. MD5 Message-Digest Algorithm" in all material 34 mentioning or referencing the derived work. 35 RSA Data Security, Inc. makes no representations concerning either 36 the merchantability of this software or the suitability of this 37 software for any particular purpose. It is provided "as is" 38 without express or implied warranty of any kind. 39 40 These notices must be retained in any copies of any part of this 41 documentation and/or software. 42 *****************************************************************************************/ 43 44 /**************************************************************************************** 45 This implementation of the RSA MD5 Algorithm was written by Langfine Ltd. 46 47 Langfine Ltd makes no representations concerning either 48 the merchantability of this software or the suitability of this 49 software for any particular purpose. It is provided "as is" 50 without express or implied warranty of any kind. 51 52 In addition to the above, Langfine make no warrant or assurances regarding the 53 accuracy of this implementation of the MD5 checksum algorithm nor any assurances regarding 54 its suitability for any purposes. 55 56 This implementation may be used freely provided that Langfine is credited 57 in a copyright or similar notices (eg, RSA MD5 Algorithm implemented by Langfine 58 Ltd.) and provided that the RSA Data Security notices are complied with. 59 60 Langfine may be contacted at mail@langfine.com 61 */ 62 63 /***************************************************************************************** 64 CLASS: CMD5Checksum 65 DESCRIPTION: Implements the "RSA Data Security, Inc. MD5 Message-Digest Algorithm". 66 NOTES: Calculates the RSA MD5 checksum for a file or congiguous array of data. 67 68 Below are extracts from a memo on The MD5 Message-Digest Algorithm by R. Rivest of MIT 69 Laboratory for Computer Science and RSA Data Security, Inc., April 1992. 70 71 1. Executive Summary 72 This document describes the MD5 message-digest algorithm. The 73 algorithm takes as input a message of arbitrary length and produces 74 as output a 128-bit "fingerprint" or "message digest" of the input. 75 It is conjectured that it is computationally infeasible to produce 76 two messages having the same message digest, or to produce any 77 message having a given prespecified target message digest. The MD5 78 algorithm is intended for digital signature applications, where a 79 large file must be "compressed" in a secure manner before being 80 encrypted with a private (secret) key under a public-key cryptosystem 81 such as RSA. 82 83 The MD5 algorithm is designed to be quite fast on 32-bit machines. In 84 addition, the MD5 algorithm does not require any large substitution 85 tables; the algorithm can be coded quite compactly. 86 The MD5 algorithm is an extension of the MD4 message-digest algorithm 87 1,2]. MD5 is slightly slower than MD4, but is more "conservative" in 88 design. MD5 was designed because it was felt that MD4 was perhaps 89 being adopted for use more quickly than justified by the existing 90 critical review; because MD4 was designed to be exceptionally fast, 91 it is "at the edge" in terms of risking successful cryptanalytic 92 attack. MD5 backs off a bit, giving up a little in speed for a much 93 greater likelihood of ultimate security. It incorporates some 94 suggestions made by various reviewers, and contains additional 95 optimizations. The MD5 algorithm is being placed in the public domain 96 for review and possible adoption as a standard. 97 98 99 2. Terminology and Notation 100 In this document a "word" is a 32-bit quantity and a "byte" is an 101 eight-bit quantity. A sequence of bits can be interpreted in a 102 natural manner as a sequence of bytes, where each consecutive group 103 of eight bits is interpreted as a byte with the high-order (most 104 significant) bit of each byte listed first. Similarly, a sequence of 105 bytes can be interpreted as a sequence of 32-bit words, where each 106 consecutive group of four bytes is interpreted as a word with the 107 low-order (least significant) byte given first. 108 Let x_i denote "x sub i". If the subscript is an expression, we 109 surround it in braces, as in x_{i+1}. Similarly, we use ^ for 110 superscripts (exponentiation), so that x^i denotes x to the i-th power. 111 Let the symbol "+" denote addition of words (i.e., modulo-2^32 112 addition). Let X <<< s denote the 32-bit value obtained by circularly 113 shifting (rotating) X left by s bit positions. Let not(X) denote the 114 bit-wise complement of X, and let X v Y denote the bit-wise OR of X 115 and Y. Let X xor Y denote the bit-wise XOR of X and Y, and let XY 116 denote the bit-wise AND of X and Y. 117 118 119 3. MD5 Algorithm Description 120 We begin by supposing that we have a b-bit message as input, and that 121 we wish to find its message digest. Here b is an arbitrary 122 nonnegative integer; b may be zero, it need not be a multiple of 123 eight, and it may be arbitrarily large. We imagine the bits of the 124 message written down as follows: m_0 m_1 ... m_{b-1} 125 The following five steps are performed to compute the message digest 126 of the message. 127 128 3.1 Step 1. Append Padding Bits 129 The message is "padded" (extended) so that its length (in bits) is 130 congruent to 448, modulo 512. That is, the message is extended so 131 that it is just 64 bits shy of being a multiple of 512 bits long. 132 Padding is always performed, even if the length of the message is 133 already congruent to 448, modulo 512. 134 Padding is performed as follows: a single "1" bit is appended to the 135 message, and then "0" bits are appended so that the length in bits of 136 the padded message becomes congruent to 448, modulo 512. In all, at 137 least one bit and at most 512 bits are appended. 138 139 3.2 Step 2. Append Length 140 A 64-bit representation of b (the length of the message before the 141 padding bits were added) is appended to the result of the previous 142 step. In the unlikely event that b is greater than 2^64, then only 143 the low-order 64 bits of b are used. (These bits are appended as two 144 32-bit words and appended low-order word first in accordance with the 145 previous conventions.) 146 At this point the resulting message (after padding with bits and with 147 b) has a length that is an exact multiple of 512 bits. Equivalently, 148 this message has a length that is an exact multiple of 16 (32-bit) 149 words. Let M[0 ... N-1] denote the words of the resulting message, 150 where N is a multiple of 16. 151 152 3.3 Step 3. Initialize MD Buffer 153 A four-word buffer (A,B,C,D) is used to compute the message digest. 154 Here each of A, B, C, D is a 32-bit register. These registers are 155 initialized to the following values in hexadecimal, low-order bytes first): 156 word A: 01 23 45 67 word B: 89 ab cd ef 157 word C: fe dc ba 98 word D: 76 54 32 10 158 159 3.4 Step 4. Process Message in 16-Word Blocks 160 We first define four auxiliary functions that each take as input 161 three 32-bit words and produce as output one 32-bit word. 162 F(X,Y,Z) = XY v not(X) Z G(X,Y,Z) = XZ v Y not(Z) 163 H(X,Y,Z) = X xor Y xor Z I(X,Y,Z) = Y xor (X v not(Z)) 164 In each bit position F acts as a conditional: if X then Y else Z. 165 The function F could have been defined using + instead of v since XY 166 and not(X)Z will never have 1's in the same bit position.) It is 167 interesting to note that if the bits of X, Y, and Z are independent 168 and unbiased, the each bit of F(X,Y,Z) will be independent and unbiased. 169 The functions G, H, and I are similar to the function F, in that they 170 act in "bitwise parallel" to produce their output from the bits of X, 171 Y, and Z, in such a manner that if the corresponding bits of X, Y, 172 and Z are independent and unbiased, then each bit of G(X,Y,Z), 173 H(X,Y,Z), and I(X,Y,Z) will be independent and unbiased. Note that 174 the function H is the bit-wise "xor" or "parity" function of its inputs. 175 This step uses a 64-element table T[1 ... 64] constructed from the 176 sine function. Let T[i] denote the i-th element of the table, which 177 is equal to the integer part of 4294967296 times abs(sin(i)), where i 178 is in radians. The elements of the table are given in the appendix. 179 Do the following: 180 181 //Process each 16-word block. 182 For i = 0 to N/16-1 do // Copy block i into X. 183 For j = 0 to 15 do 184 Set X[j] to M[i*16+j]. 185 end //of loop on j 186 187 // Save A as AA, B as BB, C as CC, and D as DD. 188 AA = A BB = B 189 CC = C DD = D 190 191 // Round 1. 192 // Let [abcd k s i] denote the operation 193 // a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). 194 // Do the following 16 operations. 195 [ABCD 0 7 1] [DABC 1 12 2] [CDAB 2 17 3] [BCDA 3 22 4] 196 [ABCD 4 7 5] [DABC 5 12 6] [CDAB 6 17 7] [BCDA 7 22 8] 197 [ABCD 8 7 9] [DABC 9 12 10] [CDAB 10 17 11] [BCDA 11 22 12] 198 [ABCD 12 7 13] [DABC 13 12 14] [CDAB 14 17 15] [BCDA 15 22 16] 199 200 // Round 2. 201 // Let [abcd k s i] denote the operation 202 // a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). 203 // Do the following 16 operations. 204 [ABCD 1 5 17] [DABC 6 9 18] [CDAB 11 14 19] [BCDA 0 20 20] 205 [ABCD 5 5 21] [DABC 10 9 22] [CDAB 15 14 23] [BCDA 4 20 24] 206 [ABCD 9 5 25] [DABC 14 9 26] [CDAB 3 14 27] [BCDA 8 20 28] 207 [ABCD 13 5 29] [DABC 2 9 30] [CDAB 7 14 31] [BCDA 12 20 32] 208 209 // Round 3. 210 // Let [abcd k s t] denote the operation 211 // a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). 212 // Do the following 16 operations. 213 [ABCD 5 4 33] [DABC 8 11 34] [CDAB 11 16 35] [BCDA 14 23 36] 214 [ABCD 1 4 37] [DABC 4 11 38] [CDAB 7 16 39] [BCDA 10 23 40] 215 [ABCD 13 4 41] [DABC 0 11 42] [CDAB 3 16 43] [BCDA 6 23 44] 216 [ABCD 9 4 45] [DABC 12 11 46] [CDAB 15 16 47] [BCDA 2 23 48] 217 218 // Round 4. 219 // Let [abcd k s t] denote the operation 220 // a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). 221 // Do the following 16 operations. 222 [ABCD 0 6 49] [DABC 7 10 50] [CDAB 14 15 51] [BCDA 5 21 52] 223 [ABCD 12 6 53] [DABC 3 10 54] [CDAB 10 15 55] [BCDA 1 21 56] 224 [ABCD 8 6 57] [DABC 15 10 58] [CDAB 6 15 59] [BCDA 13 21 60] 225 [ABCD 4 6 61] [DABC 11 10 62] [CDAB 2 15 63] [BCDA 9 21 64] 226 227 // Then perform the following additions. (That is increment each 228 // of the four registers by the value it had before this block 229 // was started.) 230 A = A + AA B = B + BB C = C + CC D = D + DD 231 232 end // of loop on i 233 234 3.5 Step 5. Output 235 The message digest produced as output is A, B, C, D. That is, we 236 begin with the low-order byte of A, and end with the high-order byte of D. 237 This completes the description of MD5. 238 239 Summary 240 The MD5 message-digest algorithm is simple to implement, and provides 241 a "fingerprint" or message digest of a message of arbitrary length. 242 It is conjectured that the difficulty of coming up with two messages 243 having the same message digest is on the order of 2^64 operations, 244 and that the difficulty of coming up with any message having a given 245 message digest is on the order of 2^128 operations. The MD5 algorithm 246 has been carefully scrutinized for weaknesses. It is, however, a 247 relatively new algorithm and further security analysis is of course 248 justified, as is the case with any new proposal of this sort. 249 250 251 5. Differences Between MD4 and MD5 252 The following are the differences between MD4 and MD5: 253 1. A fourth round has been added. 254 2. Each step now has a unique additive constant. 255 3. The function g in round 2 was changed from (XY v XZ v YZ) to 256 (XZ v Y not(Z)) to make g less symmetric. 257 4. Each step now adds in the result of the previous step. This 258 promotes a faster "avalanche effect". 259 5. The order in which input words are accessed in rounds 2 and 260 3 is changed, to make these patterns less like each other. 261 6. The shift amounts in each round have been approximately 262 optimized, to yield a faster "avalanche effect." The shifts in 263 different rounds are distinct. 264 265 References 266 [1] Rivest, R., "The MD4 Message Digest Algorithm", RFC 1320, MIT and 267 RSA Data Security, Inc., April 1992. 268 [2] Rivest, R., "The MD4 message digest algorithm", in A.J. Menezes 269 and S.A. Vanstone, editors, Advances in Cryptology - CRYPTO '90 270 Proceedings, pages 303-311, Springer-Verlag, 1991. 271 [3] CCITT Recommendation X.509 (1988), "The Directory - 272 Authentication Framework."APPENDIX A - Reference Implementation 273 274 275 The level of security discussed in this memo is considered to be 276 sufficient for implementing very high security hybrid digital- 277 signature schemes based on MD5 and a public-key cryptosystem. 278 Author's Address 279 Ronald L. Rivest Massachusetts Institute of Technology 280 Laboratory for Computer Science NE43-324 545 Technology Square 281 Cambridge, MA 02139-1986 Phone: (617) 253-5880 282 EMail: rivest@theory.lcs.mit.edu 283 284 285 *****************************************************************************************/ 286 class CMD5Checksum 287 { 288 public: 289 //interface functions for the RSA MD5 calculation 290 static tstring GetMD5(BYTE *pBuf, UINT nLength); 291 static tstring GetMD5(FILE *File, long long nLength); 292 static tstring GetMD5(const tstring &strFilePath, long long nLength); 293 static unsigned char *_GetMD5(BYTE *pBuf, UINT nLength); 294 static unsigned char *_GetMD5(FILE *File, long long nLength); 295 static unsigned char *_GetMD5(const tstring &strFilePath, long long nLength); 296 protected: 297 //constructor/destructor 298 CMD5Checksum(); ~CMD5Checksum()299 virtual ~CMD5Checksum() 300 { 301 if (m_pBuffer) 302 { 303 delete []m_pBuffer; 304 m_pBuffer = NULL; 305 } 306 }; 307 308 //RSA MD5 implementation 309 void Transform(BYTE Block[64]); 310 void Update(BYTE *Input, UINT nInputLen); 311 tstring Final(); 312 unsigned char *FinalChar(); 313 inline DWORD RotateLeft(DWORD x, int n); 314 inline void FF(DWORD &A, DWORD B, DWORD C, DWORD D, DWORD X, DWORD S, DWORD T); 315 inline void GG(DWORD &A, DWORD B, DWORD C, DWORD D, DWORD X, DWORD S, DWORD T); 316 inline void HH(DWORD &A, DWORD B, DWORD C, DWORD D, DWORD X, DWORD S, DWORD T); 317 inline void II(DWORD &A, DWORD B, DWORD C, DWORD D, DWORD X, DWORD S, DWORD T); 318 319 //utility functions 320 void DWordToByte(BYTE *Output, DWORD *Input, UINT nLength); 321 void ByteToDWord(DWORD *Output, BYTE *Input, UINT nLength); 322 323 private: 324 PBYTE m_pBuffer; 325 BYTE m_lpszBuffer[64]; //input buffer 326 UINT m_nCount[2]; //number of bits, modulo 2^64 (lsb first) 327 UINT m_lMD5[4]; //MD5 checksum 328 }; 329 330 #endif // !defined(AFX_MD5CHECKSUM_H__2BC7928E_4C15_11D3_B2EE_A4A60E20D2C3__INCLUDED_) 331 332 333 334 335 336 337 338 339