Implementation of MD5 in C

2019-09-03 20:38发布

问题:

I want to implement an MD5 hashing function in a C project, and I want to make my own, because one thing I am afraid of is using someone else's code (mainly because I have trouble understanding such codes). So, I headed straight to the Wiki page for the pseudocode: http://en.wikipedia.org/wiki/MD5 I decided to leave the padding and breaking into 512 bit chunks stuff for later and just start with an MD5 hash of an empty string. Properly padded, it should (I think) look like this:

unsigned int message[16] = {0x80000000, 0x00000000, 0x00000000, 0x00000000,//binary: 100000000000000000000000...
                            0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000...
                            0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000...
                            0x00000000, 0x00000000, 0x00000000, 0x00000000};//...0000000000000000000000000000000

And here is how I reproduced Wiki's main loop (the one that deals with just one 512-bit chunk) in C:

unsigned int a0, b0, c0, d0, 
             A, B, C, D,
             i, F, g, bufD;

//empty string appended with 1 bit and zeroes till it is 512 bytes long as 16 32-bit words
unsigned int message[16] = {0x80000000, 0x00000000, 0x00000000, 0x00000000,//binary: 100000000000000000000000...
                            0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000...
                            0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000...
                            0x00000000, 0x00000000, 0x00000000, 0x00000000};//...0000000000000000000000000000000

unsigned int shiftAmounts[64] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
                                 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
                                 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
                                 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};

unsigned int partsOfSines[64] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
                                 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
                                 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
                                 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
                                 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
                                 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
                                 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
                                 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
                                 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
                                 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
                                 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
                                 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
                                 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
                                 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
                                 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
                                 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};

a0 = 0x67452301;
b0 = 0xefcdab89;
c0 = 0x98badcfe;
d0 = 0x10325476;

//gotta put this into a loop for each 512-bit chunk
A = a0;
B = b0;
C = c0;
D = d0;

for (i=0; i<64; i++){
    if (i < 16){
        F = (B & C) | (~B & D);
        g = i;
    } else if (i >= 16 && i < 32){
        F = (D & B) | (~D & C);
        g = (5*i + 1) % 16;
    } else if (i >= 32 && i < 48){
        F = B ^ C ^ D;
        g = (3*i + 5) % 16;
    } else if (i >= 48){
        F = C ^ (B | ~D);
        g = (7*i) % 16;
    }
    bufD = D;
    D = C;
    C = B;
    B += leftRotate((A + F + partsOfSines[i] + message[g]), shiftAmounts[i]);
    A = bufD;
}
a0 += A;
b0 += B;
c0 += C;
d0 += D;
//end future loop

printf ("a0=%u, b0=%u, c0=%u, d0=%u", a0, b0, c0, d0);

The leftRotate function, also modified from the pseudocode on Wikipedia:

unsigned int leftRotate(unsigned int x, int n){
    return ((x) << n) | ((x) >> (32 - n));
}

This outputs the following:

a0=578518856, b0=3524428790, c0=1003076545, d0=1531243034

Converting these decimal values to hex, I get the following:

a0 = 578518856 ->  227B7F48
b0 = 3524428790 -> D21283F6
c0 = 1003076545 -> 3BC9BBC1
d0 = 1531243034 -> 5B44EA1A

Which isn't even a bit similar to the actual MD5 digest of an empty string (which is d41d8cd98f00b204e9800998ecf8427e). So, my question is, where am I going wrong?

回答1:

#include <stdio.h>

//empty string appended with 1 bit and zeroes till it is 512 bytes long as 16 32-bit words
unsigned int message[16] = {0x00000080, 0x00000000, 0x00000000, 0x00000000,
                            0x00000000, 0x00000000, 0x00000000, 0x00000000,
                            0x00000000, 0x00000000, 0x00000000, 0x00000000,
                            0x00000000, 0x00000000, 0x00000000, 0x00000000};

unsigned int shiftAmounts[64] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
                                 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
                                 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
                                 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};

unsigned int partsOfSines[64] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
                                 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
                                 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
                                 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
                                 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
                                 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
                                 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
                                 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
                                 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
                                 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
                                 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
                                 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
                                 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
                                 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
                                 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
                                 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};

unsigned leftRotate(unsigned x, int n) {
    return (x << n) | (x >> (32 - n));
}

void printReverseEndian(unsigned n) {
  printf("%02x%02x%02x%02x", n & 0xff, (n >> 8) & 0xff, (n >> 16) & 0xff, n >> 24);
}

int main() {

  unsigned int a0, b0, c0, d0, 
               A, B, C, D,
               i, F, g, bufD;

  a0 = 0x67452301;
  b0 = 0xefcdab89;
  c0 = 0x98badcfe;
  d0 = 0x10325476;

  A = a0;
  B = b0;
  C = c0;
  D = d0;

  for (i=0; i<64; i++){
    if (i < 16){
        F = (B & C) | (~B & D);
        g = i;
    } else if (i < 32){
        F = (D & B) | (~D & C);
        g = (5*i + 1) % 16;
    } else if (i < 48){
        F = B ^ C ^ D;
        g = (3*i + 5) % 16;
    } else {
        F = C ^ (B | ~D);
        g = (7*i) % 16;
    }
    bufD = D;
    D = C;
    C = B;
    B += leftRotate((A + F + partsOfSines[i] + message[g]), shiftAmounts[i]);
    A = bufD;
  }
  a0 += A;
  b0 += B;
  c0 += C;
  d0 += D;


  printReverseEndian(a0);
  printReverseEndian(b0);
  printReverseEndian(c0);
  printReverseEndian(d0);

  return 0;
}


回答2:

Might be a problem of target architecture (endianness, size of types, ...)
My answer comes from bits of http://bytes.com/topic/c/answers/855645-simple-md5-hash-different-output-different-os

Edit:
The problem is because the bit of padding you added isn't on the correct byte considering little-endian storage in memory. Moreover, you must print the resulting md5 hash as it is stored in memory (in LE).

#include <stdio.h>

unsigned int leftRotate(unsigned int x, unsigned int n){
    return (x << n) | (x >> (32 - n));
}

int main(){
    unsigned int a0, b0, c0, d0, 
                 A, B, C, D,
                 i, F, g, bufD;

    //empty string appended with 1 bit and zeroes till it is 512 bytes long as 16 32-bit words
    unsigned int message[16] = {0x00000080, 0x00000000, 0x00000000, 0x00000000,//binary: 100000000000000000000000...
                                0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000...
                                0x00000000, 0x00000000, 0x00000000, 0x00000000,// ...0000000000000000000000000000...
                                0x00000000, 0x00000000, 0x00000000, 0x00000000};//...0000000000000000000000000000000

    unsigned int shiftAmounts[64] = {7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
                                     5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
                                     4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
                                     6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};

    unsigned int partsOfSines[64] = {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
                                     0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
                                     0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
                                     0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
                                     0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
                                     0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
                                     0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
                                     0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
                                     0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
                                     0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
                                     0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
                                     0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
                                     0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
                                     0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
                                     0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
                                     0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};

    a0 = 0x67452301;
    b0 = 0xefcdab89;
    c0 = 0x98badcfe;
    d0 = 0x10325476;

    //gotta put this into a loop for each 512-bit chunk
    A = a0;
    B = b0;
    C = c0;
    D = d0;

    for (i=0; i<64; i++){
        if (i < 16){
            F = (B & C) | (~B & D);
            g = i;
        } else if (i >= 16 && i < 32){
            F = (D & B) | (~D & C);
            g = (5*i + 1) % 16;
        } else if (i >= 32 && i < 48){
            F = B ^ C ^ D;
            g = (3*i + 5) % 16;
        } else if (i >= 48){
            F = C ^ (B | ~D);
            g = (7*i) % 16;
        }
        bufD = D;
        D = C;
        C = B;
        B += leftRotate((A + F + partsOfSines[i] + message[g]), shiftAmounts[i]);
        A = bufD;
    }
    a0 += A;
    b0 += B;
    c0 += C;
    d0 += D;
    //end future loop

    printf("%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", a0&0xff, (a0>>8)&0xff, (a0>>16)&0xff, (a0>>24)&0xff, b0&0xff, (b0>>8)&0xff, (b0>>16)&0xff, (b0>>24)&0xff, c0&0xff, (c0>>8)&0xff, (c0>>16)&0xff, (c0>>24)&0xff, d0&0xff, (d0>>8)&0xff, (d0>>16)&0xff, (d0>>24)&0xff);
}

Edit2:
Ahhhh, too late :/



标签: c hash md5