Justin Ruggles | 1 Jul 2006 05:45
Picon
Favicon

Re: [PATCH] flacenc - lpc and options

Michael Niedermayer wrote:
> [...]
> a cleaned up md5.c is attached, iam not claiming it to be faster, probably
> its not but that is a gcc issue, if gcc would unroll the main loop it would
> be quite fast as everything could be optimized away
> 
> [...]

In my standalone encoder, I was using a modified version of a pretty
good public domain MD5 implementation.  I have added a couple more
changes based on your code.  This is not a proposal for final inclusion,
just another source for ideas.  It seems to run about 5X faster than
what you posted, but like you said, that could just be a gcc issue.

-Justin

/**
 * This is an implementation of the RSA Data Security, Inc.
 * MD5 Message-Digest Algorithm.
 *
 * Written by Solar Designer <solar at openwall.com> in 2001, and placed
 * in the public domain.
 *
 * Modified in 2006 by Justin Ruggles for use with FFmpeg.
 * [LGPL goes here]
 */

//#include "common.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>

#include "md5.h"

/**
 * The basic MD5 functions.
 *
 * F is optimized compared to its RFC 1321 definition just like in Colin
 * Plumb's implementation.
 */
#define F(x, y, z)  ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z)  ((y) ^ ((z) & ((x) ^ (y))))
#define H(x, y, z)  ((x) ^ (y) ^ (z))
#define I(x, y, z)  ((y) ^ ((x) | ~(z)))

/**
 * The MD5 transformation for all four rounds.
 */
#define STEP(f, a, b, c, d, x, t, s) \
    (a) += f((b), (c), (d)) + (x) + (t); \
    (a) = (((a) << (s)) | (((a) & 0xFFFFFFFF) >> (32 - (s)))); \
    (a) += (b);

/**
 * This processes one or more 64-byte data blocks, but does NOT update
 * the bit counters.  There are no alignment requirements.
 */
static void *body(MD5Context *ctx, const void *data, uint32_t len)
{
    int i;
    uint8_t *ptr;
    uint32_t a, b, c, d;
    uint32_t saved_a, saved_b, saved_c, saved_d;

    ptr = (uint8_t *)data;

    a = ctx->abcd[0];
    b = ctx->abcd[1];
    c = ctx->abcd[2];
    d = ctx->abcd[3];

    do {
        saved_a = a;
        saved_b = b;
        saved_c = c;
        saved_d = d;

        memcpy(ctx->block, ptr, 64);

#ifdef WORDS_BIGENDIAN
        for(i=0; i<16; i++) {
            ctx->block[i]= bswap_32(ctx->block[i]);
        }
#endif

        /* Round 1 */
        STEP(F, a, b, c, d, ctx->block[ 0], 0xD76AA478,  7)
        STEP(F, d, a, b, c, ctx->block[ 1], 0xE8C7B756, 12)
        STEP(F, c, d, a, b, ctx->block[ 2], 0x242070DB, 17)
        STEP(F, b, c, d, a, ctx->block[ 3], 0xC1BDCEEE, 22)
        STEP(F, a, b, c, d, ctx->block[ 4], 0xF57C0FAF,  7)
        STEP(F, d, a, b, c, ctx->block[ 5], 0x4787C62A, 12)
        STEP(F, c, d, a, b, ctx->block[ 6], 0xA8304613, 17)
        STEP(F, b, c, d, a, ctx->block[ 7], 0xFD469501, 22)
        STEP(F, a, b, c, d, ctx->block[ 8], 0x698098D8,  7)
        STEP(F, d, a, b, c, ctx->block[ 9], 0x8B44F7AF, 12)
        STEP(F, c, d, a, b, ctx->block[10], 0xFFFF5BB1, 17)
        STEP(F, b, c, d, a, ctx->block[11], 0x895CD7BE, 22)
        STEP(F, a, b, c, d, ctx->block[12], 0x6B901122,  7)
        STEP(F, d, a, b, c, ctx->block[13], 0xFD987193, 12)
        STEP(F, c, d, a, b, ctx->block[14], 0xA679438E, 17)
        STEP(F, b, c, d, a, ctx->block[15], 0x49B40821, 22)

        /* Round 2 */
        STEP(G, a, b, c, d, ctx->block[ 1], 0xF61E2562,  5)
        STEP(G, d, a, b, c, ctx->block[ 6], 0xC040B340,  9)
        STEP(G, c, d, a, b, ctx->block[11], 0x265E5A51, 14)
        STEP(G, b, c, d, a, ctx->block[ 0], 0xE9B6C7AA, 20)
        STEP(G, a, b, c, d, ctx->block[ 5], 0xD62F105D,  5)
        STEP(G, d, a, b, c, ctx->block[10], 0x02441453,  9)
        STEP(G, c, d, a, b, ctx->block[15], 0xD8A1E681, 14)
        STEP(G, b, c, d, a, ctx->block[ 4], 0xE7D3FBC8, 20)
        STEP(G, a, b, c, d, ctx->block[ 9], 0x21E1CDE6,  5)
        STEP(G, d, a, b, c, ctx->block[14], 0xC33707D6,  9)
        STEP(G, c, d, a, b, ctx->block[ 3], 0xF4D50D87, 14)
        STEP(G, b, c, d, a, ctx->block[ 8], 0x455A14ED, 20)
        STEP(G, a, b, c, d, ctx->block[13], 0xA9E3E905,  5)
        STEP(G, d, a, b, c, ctx->block[ 2], 0xFCEFA3F8,  9)
        STEP(G, c, d, a, b, ctx->block[ 7], 0x676F02D9, 14)
        STEP(G, b, c, d, a, ctx->block[12], 0x8D2A4C8A, 20)

        /* Round 3 */
        STEP(H, a, b, c, d, ctx->block[ 5], 0xFFFA3942,  4)
        STEP(H, d, a, b, c, ctx->block[ 8], 0x8771F681, 11)
        STEP(H, c, d, a, b, ctx->block[11], 0x6D9D6122, 16)
        STEP(H, b, c, d, a, ctx->block[14], 0xFDE5380C, 23)
        STEP(H, a, b, c, d, ctx->block[ 1], 0xA4BEEA44,  4)
        STEP(H, d, a, b, c, ctx->block[ 4], 0x4BDECFA9, 11)
        STEP(H, c, d, a, b, ctx->block[ 7], 0xF6BB4B60, 16)
        STEP(H, b, c, d, a, ctx->block[10], 0xBEBFBC70, 23)
        STEP(H, a, b, c, d, ctx->block[13], 0x289B7EC6,  4)
        STEP(H, d, a, b, c, ctx->block[ 0], 0xEAA127FA, 11)
        STEP(H, c, d, a, b, ctx->block[ 3], 0xD4EF3085, 16)
        STEP(H, b, c, d, a, ctx->block[ 6], 0x04881D05, 23)
        STEP(H, a, b, c, d, ctx->block[ 9], 0xD9D4D039,  4)
        STEP(H, d, a, b, c, ctx->block[12], 0xE6DB99E5, 11)
        STEP(H, c, d, a, b, ctx->block[15], 0x1FA27CF8, 16)
        STEP(H, b, c, d, a, ctx->block[ 2], 0xC4AC5665, 23)

        /* Round 4 */
        STEP(I, a, b, c, d, ctx->block[ 0], 0xF4292244,  6)
        STEP(I, d, a, b, c, ctx->block[ 7], 0x432AFF97, 10)
        STEP(I, c, d, a, b, ctx->block[14], 0xAB9423A7, 15)
        STEP(I, b, c, d, a, ctx->block[ 5], 0xFC93A039, 21)
        STEP(I, a, b, c, d, ctx->block[12], 0x655B59C3,  6)
        STEP(I, d, a, b, c, ctx->block[ 3], 0x8F0CCC92, 10)
        STEP(I, c, d, a, b, ctx->block[10], 0xFFEFF47D, 15)
        STEP(I, b, c, d, a, ctx->block[ 1], 0x85845DD1, 21)
        STEP(I, a, b, c, d, ctx->block[ 8], 0x6FA87E4F,  6)
        STEP(I, d, a, b, c, ctx->block[15], 0xFE2CE6E0, 10)
        STEP(I, c, d, a, b, ctx->block[ 6], 0xA3014314, 15)
        STEP(I, b, c, d, a, ctx->block[13], 0x4E0811A1, 21)
        STEP(I, a, b, c, d, ctx->block[ 4], 0xF7537E82,  6)
        STEP(I, d, a, b, c, ctx->block[11], 0xBD3AF235, 10)
        STEP(I, c, d, a, b, ctx->block[ 2], 0x2AD7D2BB, 15)
        STEP(I, b, c, d, a, ctx->block[ 9], 0xEB86D391, 21)

        a += saved_a;
        b += saved_b;
        c += saved_c;
        d += saved_d;

        ptr += 64;
    } while(len -= 64);

    ctx->abcd[0] = a;
    ctx->abcd[1] = b;
    ctx->abcd[2] = c;
    ctx->abcd[3] = d;

    return ptr;
}

void av_md5_init(MD5Context *ctx)
{
    ctx->abcd[0] = 0x67452301;
    ctx->abcd[1] = 0xEFCDAB89;
    ctx->abcd[2] = 0x98BADCFE;
    ctx->abcd[3] = 0x10325476;
    ctx->lo = 0;
    ctx->hi = 0;
}

void av_md5_update(MD5Context *ctx, const void *data, uint32_t len)
{
    uint32_t saved_lo;
    uint32_t used, free;

    saved_lo = ctx->lo;
    if((ctx->lo = (saved_lo + len) & 0x1FFFFFFF) < saved_lo)
        ctx->hi++;
    ctx->hi += len >> 29;

    used = saved_lo & 0x3F;

    if(used) {
        free = 64 - used;
        if(len < free) {
            memcpy(&ctx->buffer[used], data, len);
            return;
        }
        memcpy(&ctx->buffer[used], data, free);
        data = (uint8_t *)data + free;
        len -= free;
        body(ctx, ctx->buffer, 64);
    }

    if(len >= 64) {
        data = body(ctx, data, len & ~(uint32_t)0x3F);
        len &= 0x3F;
    }

    memcpy(ctx->buffer, data, len);
}

void av_md5_final(MD5Context *ctx, uint8_t *result)
{
    int i;
    uint32_t used, free;

    used = ctx->lo & 0x3F;

    ctx->buffer[used++] = 0x80;

    free = 64 - used;

    if(free < 8) {
        memset(&ctx->buffer[used], 0, free);
        body(ctx, ctx->buffer, 64);
        used = 0;
        free = 64;
    }

    memset(&ctx->buffer[used], 0, free-8);

    ctx->lo <<= 3;
    ctx->buffer[56] = ctx->lo;
    ctx->buffer[57] = ctx->lo >> 8;
    ctx->buffer[58] = ctx->lo >> 16;
    ctx->buffer[59] = ctx->lo >> 24;
    ctx->buffer[60] = ctx->hi;
    ctx->buffer[61] = ctx->hi >> 8;
    ctx->buffer[62] = ctx->hi >> 16;
    ctx->buffer[63] = ctx->hi >> 24;

    body(ctx, ctx->buffer, 64);

#define le2me_32(a) a
    for(i=0; i<4; i++) {
        ((uint32_t*)result)[i]= le2me_32(ctx->abcd[i]);
    }

    memset(ctx, 0, sizeof(*ctx));
}

void av_md5_sum(uint8_t *result, const void *data, uint32_t len)
{
    MD5Context ctx;

    av_md5_init(&ctx);
    av_md5_update(&ctx, data, len);
    av_md5_final(&ctx, result);
}

#ifdef TEST
#include <stdio.h>

static void md5_print( uint8_t digest[16])
{
    int i;
    for(i=0; i<16; i++) {
        printf("%02x", digest[i]);
    }
}

#define BUFSIZE 8192

int main(int argc, char **argv)
{
    MD5Context ctx;
    int nr;
    uint8_t buffer[BUFSIZE];
    uint8_t digest[16];

    av_md5_init(&ctx);
    nr = fread(buffer, 1, BUFSIZE, stdin);
    while(nr > 0) {
        av_md5_update(&ctx, buffer, nr);
        nr = fread(buffer, 1, BUFSIZE, stdin);
    }
    av_md5_final(&ctx, digest);

    md5_print(digest);
    printf("\n");
}
#endif
/**
 * This is an implementation of the RSA Data Security, Inc.
 * MD5 Message-Digest Algorithm.
 *
 * Written by Solar Designer <solar at openwall.com> in 2001, and placed
 * in the public domain.
 *
 * Modified in 2006 by Justin Ruggles for use with FFmpeg.
 * [LGPL goes here]
 */

#ifndef MD5_H
#define MD5_H

#include <inttypes.h>

typedef struct MD5Context {
    uint32_t lo, hi;
    uint32_t abcd[4];
    uint8_t buffer[64];
    uint32_t block[16];
} MD5Context;

extern void av_md5_init(MD5Context *ctx);

extern void av_md5_update(MD5Context *ctx, const void *data, uint32_t len);

extern void av_md5_final(MD5Context *ctx, uint8_t *result);

extern void av_md5_sum(uint8_t *result, const void *data, uint32_t len);

#endif /* MD5_H */
_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel <at> mplayerhq.hu
http://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-devel

Gmane