1 Jul 2006 05:45
Re: [PATCH] flacenc - lpc and options
Justin Ruggles <jruggle <at> earthlink.net>
2006-07-01 03:45:48 GMT
2006-07-01 03:45:48 GMT
Michael Niedermayer wrote: > [...] > a cleaned up md5.c is attached, iam not claiming it to be faster, probably > its not but that is a gcc issue, if gcc would unroll the main loop it would > be quite fast as everything could be optimized away > > [...] In my standalone encoder, I was using a modified version of a pretty good public domain MD5 implementation. I have added a couple more changes based on your code. This is not a proposal for final inclusion, just another source for ideas. It seems to run about 5X faster than what you posted, but like you said, that could just be a gcc issue. -Justin
/**
* This is an implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm.
*
* Written by Solar Designer <solar at openwall.com> in 2001, and placed
* in the public domain.
*
* Modified in 2006 by Justin Ruggles for use with FFmpeg.
* [LGPL goes here]
*/
//#include "common.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
#include "md5.h"
/**
* The basic MD5 functions.
*
* F is optimized compared to its RFC 1321 definition just like in Colin
* Plumb's implementation.
*/
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
/**
* The MD5 transformation for all four rounds.
*/
#define STEP(f, a, b, c, d, x, t, s) \
(a) += f((b), (c), (d)) + (x) + (t); \
(a) = (((a) << (s)) | (((a) & 0xFFFFFFFF) >> (32 - (s)))); \
(a) += (b);
/**
* This processes one or more 64-byte data blocks, but does NOT update
* the bit counters. There are no alignment requirements.
*/
static void *body(MD5Context *ctx, const void *data, uint32_t len)
{
int i;
uint8_t *ptr;
uint32_t a, b, c, d;
uint32_t saved_a, saved_b, saved_c, saved_d;
ptr = (uint8_t *)data;
a = ctx->abcd[0];
b = ctx->abcd[1];
c = ctx->abcd[2];
d = ctx->abcd[3];
do {
saved_a = a;
saved_b = b;
saved_c = c;
saved_d = d;
memcpy(ctx->block, ptr, 64);
#ifdef WORDS_BIGENDIAN
for(i=0; i<16; i++) {
ctx->block[i]= bswap_32(ctx->block[i]);
}
#endif
/* Round 1 */
STEP(F, a, b, c, d, ctx->block[ 0], 0xD76AA478, 7)
STEP(F, d, a, b, c, ctx->block[ 1], 0xE8C7B756, 12)
STEP(F, c, d, a, b, ctx->block[ 2], 0x242070DB, 17)
STEP(F, b, c, d, a, ctx->block[ 3], 0xC1BDCEEE, 22)
STEP(F, a, b, c, d, ctx->block[ 4], 0xF57C0FAF, 7)
STEP(F, d, a, b, c, ctx->block[ 5], 0x4787C62A, 12)
STEP(F, c, d, a, b, ctx->block[ 6], 0xA8304613, 17)
STEP(F, b, c, d, a, ctx->block[ 7], 0xFD469501, 22)
STEP(F, a, b, c, d, ctx->block[ 8], 0x698098D8, 7)
STEP(F, d, a, b, c, ctx->block[ 9], 0x8B44F7AF, 12)
STEP(F, c, d, a, b, ctx->block[10], 0xFFFF5BB1, 17)
STEP(F, b, c, d, a, ctx->block[11], 0x895CD7BE, 22)
STEP(F, a, b, c, d, ctx->block[12], 0x6B901122, 7)
STEP(F, d, a, b, c, ctx->block[13], 0xFD987193, 12)
STEP(F, c, d, a, b, ctx->block[14], 0xA679438E, 17)
STEP(F, b, c, d, a, ctx->block[15], 0x49B40821, 22)
/* Round 2 */
STEP(G, a, b, c, d, ctx->block[ 1], 0xF61E2562, 5)
STEP(G, d, a, b, c, ctx->block[ 6], 0xC040B340, 9)
STEP(G, c, d, a, b, ctx->block[11], 0x265E5A51, 14)
STEP(G, b, c, d, a, ctx->block[ 0], 0xE9B6C7AA, 20)
STEP(G, a, b, c, d, ctx->block[ 5], 0xD62F105D, 5)
STEP(G, d, a, b, c, ctx->block[10], 0x02441453, 9)
STEP(G, c, d, a, b, ctx->block[15], 0xD8A1E681, 14)
STEP(G, b, c, d, a, ctx->block[ 4], 0xE7D3FBC8, 20)
STEP(G, a, b, c, d, ctx->block[ 9], 0x21E1CDE6, 5)
STEP(G, d, a, b, c, ctx->block[14], 0xC33707D6, 9)
STEP(G, c, d, a, b, ctx->block[ 3], 0xF4D50D87, 14)
STEP(G, b, c, d, a, ctx->block[ 8], 0x455A14ED, 20)
STEP(G, a, b, c, d, ctx->block[13], 0xA9E3E905, 5)
STEP(G, d, a, b, c, ctx->block[ 2], 0xFCEFA3F8, 9)
STEP(G, c, d, a, b, ctx->block[ 7], 0x676F02D9, 14)
STEP(G, b, c, d, a, ctx->block[12], 0x8D2A4C8A, 20)
/* Round 3 */
STEP(H, a, b, c, d, ctx->block[ 5], 0xFFFA3942, 4)
STEP(H, d, a, b, c, ctx->block[ 8], 0x8771F681, 11)
STEP(H, c, d, a, b, ctx->block[11], 0x6D9D6122, 16)
STEP(H, b, c, d, a, ctx->block[14], 0xFDE5380C, 23)
STEP(H, a, b, c, d, ctx->block[ 1], 0xA4BEEA44, 4)
STEP(H, d, a, b, c, ctx->block[ 4], 0x4BDECFA9, 11)
STEP(H, c, d, a, b, ctx->block[ 7], 0xF6BB4B60, 16)
STEP(H, b, c, d, a, ctx->block[10], 0xBEBFBC70, 23)
STEP(H, a, b, c, d, ctx->block[13], 0x289B7EC6, 4)
STEP(H, d, a, b, c, ctx->block[ 0], 0xEAA127FA, 11)
STEP(H, c, d, a, b, ctx->block[ 3], 0xD4EF3085, 16)
STEP(H, b, c, d, a, ctx->block[ 6], 0x04881D05, 23)
STEP(H, a, b, c, d, ctx->block[ 9], 0xD9D4D039, 4)
STEP(H, d, a, b, c, ctx->block[12], 0xE6DB99E5, 11)
STEP(H, c, d, a, b, ctx->block[15], 0x1FA27CF8, 16)
STEP(H, b, c, d, a, ctx->block[ 2], 0xC4AC5665, 23)
/* Round 4 */
STEP(I, a, b, c, d, ctx->block[ 0], 0xF4292244, 6)
STEP(I, d, a, b, c, ctx->block[ 7], 0x432AFF97, 10)
STEP(I, c, d, a, b, ctx->block[14], 0xAB9423A7, 15)
STEP(I, b, c, d, a, ctx->block[ 5], 0xFC93A039, 21)
STEP(I, a, b, c, d, ctx->block[12], 0x655B59C3, 6)
STEP(I, d, a, b, c, ctx->block[ 3], 0x8F0CCC92, 10)
STEP(I, c, d, a, b, ctx->block[10], 0xFFEFF47D, 15)
STEP(I, b, c, d, a, ctx->block[ 1], 0x85845DD1, 21)
STEP(I, a, b, c, d, ctx->block[ 8], 0x6FA87E4F, 6)
STEP(I, d, a, b, c, ctx->block[15], 0xFE2CE6E0, 10)
STEP(I, c, d, a, b, ctx->block[ 6], 0xA3014314, 15)
STEP(I, b, c, d, a, ctx->block[13], 0x4E0811A1, 21)
STEP(I, a, b, c, d, ctx->block[ 4], 0xF7537E82, 6)
STEP(I, d, a, b, c, ctx->block[11], 0xBD3AF235, 10)
STEP(I, c, d, a, b, ctx->block[ 2], 0x2AD7D2BB, 15)
STEP(I, b, c, d, a, ctx->block[ 9], 0xEB86D391, 21)
a += saved_a;
b += saved_b;
c += saved_c;
d += saved_d;
ptr += 64;
} while(len -= 64);
ctx->abcd[0] = a;
ctx->abcd[1] = b;
ctx->abcd[2] = c;
ctx->abcd[3] = d;
return ptr;
}
void av_md5_init(MD5Context *ctx)
{
ctx->abcd[0] = 0x67452301;
ctx->abcd[1] = 0xEFCDAB89;
ctx->abcd[2] = 0x98BADCFE;
ctx->abcd[3] = 0x10325476;
ctx->lo = 0;
ctx->hi = 0;
}
void av_md5_update(MD5Context *ctx, const void *data, uint32_t len)
{
uint32_t saved_lo;
uint32_t used, free;
saved_lo = ctx->lo;
if((ctx->lo = (saved_lo + len) & 0x1FFFFFFF) < saved_lo)
ctx->hi++;
ctx->hi += len >> 29;
used = saved_lo & 0x3F;
if(used) {
free = 64 - used;
if(len < free) {
memcpy(&ctx->buffer[used], data, len);
return;
}
memcpy(&ctx->buffer[used], data, free);
data = (uint8_t *)data + free;
len -= free;
body(ctx, ctx->buffer, 64);
}
if(len >= 64) {
data = body(ctx, data, len & ~(uint32_t)0x3F);
len &= 0x3F;
}
memcpy(ctx->buffer, data, len);
}
void av_md5_final(MD5Context *ctx, uint8_t *result)
{
int i;
uint32_t used, free;
used = ctx->lo & 0x3F;
ctx->buffer[used++] = 0x80;
free = 64 - used;
if(free < 8) {
memset(&ctx->buffer[used], 0, free);
body(ctx, ctx->buffer, 64);
used = 0;
free = 64;
}
memset(&ctx->buffer[used], 0, free-8);
ctx->lo <<= 3;
ctx->buffer[56] = ctx->lo;
ctx->buffer[57] = ctx->lo >> 8;
ctx->buffer[58] = ctx->lo >> 16;
ctx->buffer[59] = ctx->lo >> 24;
ctx->buffer[60] = ctx->hi;
ctx->buffer[61] = ctx->hi >> 8;
ctx->buffer[62] = ctx->hi >> 16;
ctx->buffer[63] = ctx->hi >> 24;
body(ctx, ctx->buffer, 64);
#define le2me_32(a) a
for(i=0; i<4; i++) {
((uint32_t*)result)[i]= le2me_32(ctx->abcd[i]);
}
memset(ctx, 0, sizeof(*ctx));
}
void av_md5_sum(uint8_t *result, const void *data, uint32_t len)
{
MD5Context ctx;
av_md5_init(&ctx);
av_md5_update(&ctx, data, len);
av_md5_final(&ctx, result);
}
#ifdef TEST
#include <stdio.h>
static void md5_print( uint8_t digest[16])
{
int i;
for(i=0; i<16; i++) {
printf("%02x", digest[i]);
}
}
#define BUFSIZE 8192
int main(int argc, char **argv)
{
MD5Context ctx;
int nr;
uint8_t buffer[BUFSIZE];
uint8_t digest[16];
av_md5_init(&ctx);
nr = fread(buffer, 1, BUFSIZE, stdin);
while(nr > 0) {
av_md5_update(&ctx, buffer, nr);
nr = fread(buffer, 1, BUFSIZE, stdin);
}
av_md5_final(&ctx, digest);
md5_print(digest);
printf("\n");
}
#endif
/**
* This is an implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm.
*
* Written by Solar Designer <solar at openwall.com> in 2001, and placed
* in the public domain.
*
* Modified in 2006 by Justin Ruggles for use with FFmpeg.
* [LGPL goes here]
*/
#ifndef MD5_H
#define MD5_H
#include <inttypes.h>
typedef struct MD5Context {
uint32_t lo, hi;
uint32_t abcd[4];
uint8_t buffer[64];
uint32_t block[16];
} MD5Context;
extern void av_md5_init(MD5Context *ctx);
extern void av_md5_update(MD5Context *ctx, const void *data, uint32_t len);
extern void av_md5_final(MD5Context *ctx, uint8_t *result);
extern void av_md5_sum(uint8_t *result, const void *data, uint32_t len);
#endif /* MD5_H */
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel <at> mplayerhq.hu http://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-devel
RSS Feed