From c262649291e711c084c5d8fc3fd0eee175f155ff Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Wed, 3 Oct 2012 13:48:27 +0100 Subject: [PATCH 1/3] build: add rules to generate preprocessed source files This is useful for debugging. Dependencies for these files are not generated due to limitations in many compilers. Signed-off-by: Mans Rullgard --- Makefile | 3 +++ configure | 1 + library.mak | 11 +++++++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5e1dae66aa..91acc4c0b3 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,9 @@ COMPILE_S = $(call COMPILE,AS) %.o: %.S $(COMPILE_S) +%.i: %.c + $(CC) $(CCFLAGS) $(CC_E) $< + %.h.c: $(Q)echo '#include "$*.h"' >$@ diff --git a/configure b/configure index 890ff93b3d..0883035d07 100755 --- a/configure +++ b/configure @@ -3756,6 +3756,7 @@ ASFLAGS=$ASFLAGS AS_C=$AS_C AS_O=$AS_O CC_C=$CC_C +CC_E=$CC_E CC_O=$CC_O LD_O=$LD_O LD_LIB=$LD_LIB diff --git a/library.mak b/library.mak index 33ec37f1cd..9309c42e5d 100644 --- a/library.mak +++ b/library.mak @@ -17,12 +17,19 @@ $(SUBDIR)%-test.o: $(SUBDIR)%-test.c $(SUBDIR)%-test.o: $(SUBDIR)%.c $(COMPILE_C) +$(SUBDIR)%-test.i: $(SUBDIR)%-test.c + $(CC) $(CCFLAGS) $(CC_E) $< + +$(SUBDIR)%-test.i: $(SUBDIR)%.c + $(CC) $(CCFLAGS) $(CC_E) $< + $(SUBDIR)x86/%.o: $(SUBDIR)x86/%.asm $(DEPYASM) $(YASMFLAGS) -I $( $(@:.o=.d) $(YASM) $(YASMFLAGS) -I $( Date: Sat, 4 Aug 2012 02:57:53 +0100 Subject: [PATCH 2/3] mpegvideo: simplify dxy calculation in hpel_motion() Signed-off-by: Mans Rullgard --- libavcodec/mpegvideo_motion.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c index 9168793183..4ea31ad252 100644 --- a/libavcodec/mpegvideo_motion.c +++ b/libavcodec/mpegvideo_motion.c @@ -177,20 +177,19 @@ static inline int hpel_motion(MpegEncContext *s, op_pixels_func *pix_op, int motion_x, int motion_y) { - int dxy; + int dxy = 0; int emu=0; - dxy = ((motion_y & 1) << 1) | (motion_x & 1); src_x += motion_x >> 1; src_y += motion_y >> 1; /* WARNING: do no forget half pels */ src_x = av_clip(src_x, -16, s->width); //FIXME unneeded for emu? - if (src_x == s->width) - dxy &= ~1; + if (src_x != s->width) + dxy |= motion_x & 1; src_y = av_clip(src_y, -16, s->height); - if (src_y == s->height) - dxy &= ~2; + if (src_y != s->height) + dxy |= (motion_y & 1) << 1; src += src_y * s->linesize + src_x; if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){ From 4a606c830ae664013cea33800094d4d0f4ec62da Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 26 Oct 2012 14:42:23 +0100 Subject: [PATCH 3/3] av_memcpy_backptr: optimise some special cases - Add special cases for offsets of 2, 3, or 4 bytes. This means the offset is always >4 in the generic case, allowing 32-bit copies to be used there. - Don't use memcpy() for sizes less than 16 bytes. Signed-off-by: Mans Rullgard --- libavutil/mem.c | 120 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 103 insertions(+), 17 deletions(-) diff --git a/libavutil/mem.c b/libavutil/mem.c index feba3163b0..b6c0b29319 100644 --- a/libavutil/mem.c +++ b/libavutil/mem.c @@ -180,29 +180,93 @@ char *av_strdup(const char *s) return ptr; } +static void fill16(uint8_t *dst, int len) +{ + uint32_t v = AV_RN16(dst - 2); + + v |= v << 16; + + while (len >= 4) { + AV_WN32(dst, v); + dst += 4; + len -= 4; + } + + while (len--) { + *dst = dst[-2]; + dst++; + } +} + +static void fill24(uint8_t *dst, int len) +{ +#if HAVE_BIGENDIAN + uint32_t v = AV_RB24(dst - 3); + uint32_t a = v << 8 | v >> 16; + uint32_t b = v << 16 | v >> 8; + uint32_t c = v << 24 | v; +#else + uint32_t v = AV_RL24(dst - 3); + uint32_t a = v | v << 24; + uint32_t b = v >> 8 | v << 16; + uint32_t c = v >> 16 | v << 8; +#endif + + while (len >= 12) { + AV_WN32(dst, a); + AV_WN32(dst + 4, b); + AV_WN32(dst + 8, c); + dst += 12; + len -= 12; + } + + if (len >= 4) { + AV_WN32(dst, a); + dst += 4; + len -= 4; + } + + if (len >= 4) { + AV_WN32(dst, b); + dst += 4; + len -= 4; + } + + while (len--) { + *dst = dst[-3]; + dst++; + } +} + +static void fill32(uint8_t *dst, int len) +{ + uint32_t v = AV_RN32(dst - 4); + + while (len >= 4) { + AV_WN32(dst, v); + dst += 4; + len -= 4; + } + + while (len--) { + *dst = dst[-4]; + dst++; + } +} + void av_memcpy_backptr(uint8_t *dst, int back, int cnt) { const uint8_t *src = &dst[-back]; if (back == 1) { memset(dst, *src, cnt); + } else if (back == 2) { + fill16(dst, cnt); + } else if (back == 3) { + fill24(dst, cnt); + } else if (back == 4) { + fill32(dst, cnt); } else { - if (cnt >= 4) { - AV_COPY16U(dst, src); - AV_COPY16U(dst + 2, src + 2); - src += 4; - dst += 4; - cnt -= 4; - } - if (cnt >= 8) { - AV_COPY16U(dst, src); - AV_COPY16U(dst + 2, src + 2); - AV_COPY16U(dst + 4, src + 4); - AV_COPY16U(dst + 6, src + 6); - src += 8; - dst += 8; - cnt -= 8; - } - if (cnt > 0) { + if (cnt >= 16) { int blocklen = back; while (cnt > blocklen) { memcpy(dst, src, blocklen); @@ -211,6 +275,28 @@ void av_memcpy_backptr(uint8_t *dst, int back, int cnt) blocklen <<= 1; } memcpy(dst, src, cnt); + return; } + if (cnt >= 8) { + AV_COPY32U(dst, src); + AV_COPY32U(dst + 4, src + 4); + src += 8; + dst += 8; + cnt -= 8; + } + if (cnt >= 4) { + AV_COPY32U(dst, src); + src += 4; + dst += 4; + cnt -= 4; + } + if (cnt >= 2) { + AV_COPY16U(dst, src); + src += 2; + dst += 2; + cnt -= 2; + } + if (cnt) + *dst = *src; } }