From 9b498148ca27fb59994522301da08afafb7d14fd Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 21 Jul 2012 12:59:49 +0200
Subject: [PATCH 01/20] rtmp: Factorize the code by adding handle_server_bw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index e20aacbc6c..87bed0ee99 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -880,6 +880,21 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
     return 0;
 }
 
+static int handle_server_bw(URLContext *s, RTMPPacket *pkt)
+{
+    RTMPContext *rt = s->priv_data;
+
+    rt->server_bw = AV_RB32(pkt->data);
+    if (rt->server_bw <= 0) {
+        av_log(s, AV_LOG_ERROR, "Incorrect server bandwidth %d\n",
+               rt->server_bw);
+        return AVERROR(EINVAL);
+    }
+    av_log(s, AV_LOG_DEBUG, "Server bandwidth = %d\n", rt->server_bw);
+
+    return 0;
+}
+
 /**
  * Parse received packet and possibly perform some action depending on
  * the packet contents.
@@ -931,12 +946,8 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
         rt->client_report_size = AV_RB32(pkt->data) >> 1;
         break;
     case RTMP_PT_SERVER_BW:
-        rt->server_bw = AV_RB32(pkt->data);
-        if (rt->server_bw <= 0) {
-            av_log(s, AV_LOG_ERROR, "Incorrect server bandwidth %d\n", rt->server_bw);
-            return AVERROR(EINVAL);
-        }
-        av_log(s, AV_LOG_DEBUG, "Server bandwidth = %d\n", rt->server_bw);
+        if ((ret = handle_server_bw(s, pkt)) < 0)
+            return ret;
         break;
     case RTMP_PT_INVOKE:
         //TODO: check for the messages sent for wrong state?

From 912ecc9a19c3074d023ceba4815c0d0bde3697e0 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 21 Jul 2012 12:59:50 +0200
Subject: [PATCH 02/20] rtmp: Factorize the code by adding handle_client_bw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 87bed0ee99..15361570c2 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -880,6 +880,22 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
     return 0;
 }
 
+static int handle_client_bw(URLContext *s, RTMPPacket *pkt)
+{
+    RTMPContext *rt = s->priv_data;
+
+    if (pkt->data_size < 4) {
+        av_log(s, AV_LOG_ERROR,
+               "Client bandwidth report packet is less than 4 bytes long (%d)\n",
+               pkt->data_size);
+        return -1;
+    }
+    av_log(s, AV_LOG_DEBUG, "Client bandwidth = %d\n", AV_RB32(pkt->data));
+    rt->client_report_size = AV_RB32(pkt->data) >> 1;
+
+    return 0;
+}
+
 static int handle_server_bw(URLContext *s, RTMPPacket *pkt)
 {
     RTMPContext *rt = s->priv_data;
@@ -936,14 +952,8 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
                 return ret;
         break;
     case RTMP_PT_CLIENT_BW:
-        if (pkt->data_size < 4) {
-            av_log(s, AV_LOG_ERROR,
-                   "Client bandwidth report packet is less than 4 bytes long (%d)\n",
-                   pkt->data_size);
-            return -1;
-        }
-        av_log(s, AV_LOG_DEBUG, "Client bandwidth = %d\n", AV_RB32(pkt->data));
-        rt->client_report_size = AV_RB32(pkt->data) >> 1;
+        if ((ret = handle_client_bw(s, pkt)) < 0)
+            return ret;
         break;
     case RTMP_PT_SERVER_BW:
         if ((ret = handle_server_bw(s, pkt)) < 0)

From 0ffd5161c4f8610fa0133c50bfc19beab761f5c1 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 21 Jul 2012 12:59:51 +0200
Subject: [PATCH 03/20] rtmp: Factorize the code by adding handle_ping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 15361570c2..31fa28bf8d 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -880,6 +880,20 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
     return 0;
 }
 
+static int handle_ping(URLContext *s, RTMPPacket *pkt)
+{
+    RTMPContext *rt = s->priv_data;
+    int t, ret;
+
+    t = AV_RB16(pkt->data);
+    if (t == 6) {
+        if ((ret = gen_pong(s, rt, pkt)) < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
 static int handle_client_bw(URLContext *s, RTMPPacket *pkt)
 {
     RTMPContext *rt = s->priv_data;
@@ -946,10 +960,8 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
         av_log(s, AV_LOG_DEBUG, "New chunk size = %d\n", rt->chunk_size);
         break;
     case RTMP_PT_PING:
-        t = AV_RB16(pkt->data);
-        if (t == 6)
-            if ((ret = gen_pong(s, rt, pkt)) < 0)
-                return ret;
+        if ((ret = handle_ping(s, pkt)) < 0)
+            return ret;
         break;
     case RTMP_PT_CLIENT_BW:
         if ((ret = handle_client_bw(s, pkt)) < 0)

From 7be2a7d8ff43fba8d64320c7b778da9f942a9c1b Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 21 Jul 2012 12:59:52 +0200
Subject: [PATCH 04/20] rtmp: Factorize the code by adding handle_chunk_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 45 +++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 31fa28bf8d..871c24eba0 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -880,6 +880,34 @@ static int rtmp_handshake(URLContext *s, RTMPContext *rt)
     return 0;
 }
 
+static int handle_chunk_size(URLContext *s, RTMPPacket *pkt)
+{
+    RTMPContext *rt = s->priv_data;
+    int ret;
+
+    if (pkt->data_size != 4) {
+        av_log(s, AV_LOG_ERROR,
+               "Chunk size change packet is not 4 bytes long (%d)\n",
+               pkt->data_size);
+        return -1;
+    }
+
+    if (!rt->is_input) {
+        if ((ret = ff_rtmp_packet_write(rt->stream, pkt, rt->chunk_size,
+                                        rt->prev_pkt[1])) < 0)
+            return ret;
+    }
+
+    rt->chunk_size = AV_RB32(pkt->data);
+    if (rt->chunk_size <= 0) {
+        av_log(s, AV_LOG_ERROR, "Incorrect chunk size %d\n", rt->chunk_size);
+        return -1;
+    }
+    av_log(s, AV_LOG_DEBUG, "New chunk size = %d\n", rt->chunk_size);
+
+    return 0;
+}
+
 static int handle_ping(URLContext *s, RTMPPacket *pkt)
 {
     RTMPContext *rt = s->priv_data;
@@ -943,21 +971,8 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
 
     switch (pkt->type) {
     case RTMP_PT_CHUNK_SIZE:
-        if (pkt->data_size != 4) {
-            av_log(s, AV_LOG_ERROR,
-                   "Chunk size change packet is not 4 bytes long (%d)\n", pkt->data_size);
-            return -1;
-        }
-        if (!rt->is_input)
-            if ((ret = ff_rtmp_packet_write(rt->stream, pkt, rt->chunk_size,
-                                            rt->prev_pkt[1])) < 0)
-                return ret;
-        rt->chunk_size = AV_RB32(pkt->data);
-        if (rt->chunk_size <= 0) {
-            av_log(s, AV_LOG_ERROR, "Incorrect chunk size %d\n", rt->chunk_size);
-            return -1;
-        }
-        av_log(s, AV_LOG_DEBUG, "New chunk size = %d\n", rt->chunk_size);
+        if ((ret = handle_chunk_size(s, pkt)) < 0)
+            return ret;
         break;
     case RTMP_PT_PING:
         if ((ret = handle_ping(s, pkt)) < 0)

From 6d1c9945dd0a539aa9017697ac7572fde3dc5b83 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sat, 21 Jul 2012 12:59:58 +0200
Subject: [PATCH 05/20] rtmp: Factorize the code by adding handle_invoke
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 143 +++++++++++++++++++++-------------------
 1 file changed, 77 insertions(+), 66 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 871c24eba0..31d1f69e94 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -953,50 +953,23 @@ static int handle_server_bw(URLContext *s, RTMPPacket *pkt)
     return 0;
 }
 
-/**
- * Parse received packet and possibly perform some action depending on
- * the packet contents.
- * @return 0 for no errors, negative values for serious errors which prevent
- *         further communications, positive values for uncritical errors
- */
-static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
+static int handle_invoke(URLContext *s, RTMPPacket *pkt)
 {
+    RTMPContext *rt = s->priv_data;
     int i, t;
     const uint8_t *data_end = pkt->data + pkt->data_size;
     int ret;
 
-#ifdef DEBUG
-    ff_rtmp_packet_dump(s, pkt);
-#endif
+    //TODO: check for the messages sent for wrong state?
+    if (!memcmp(pkt->data, "\002\000\006_error", 9)) {
+        uint8_t tmpstr[256];
 
-    switch (pkt->type) {
-    case RTMP_PT_CHUNK_SIZE:
-        if ((ret = handle_chunk_size(s, pkt)) < 0)
-            return ret;
-        break;
-    case RTMP_PT_PING:
-        if ((ret = handle_ping(s, pkt)) < 0)
-            return ret;
-        break;
-    case RTMP_PT_CLIENT_BW:
-        if ((ret = handle_client_bw(s, pkt)) < 0)
-            return ret;
-        break;
-    case RTMP_PT_SERVER_BW:
-        if ((ret = handle_server_bw(s, pkt)) < 0)
-            return ret;
-        break;
-    case RTMP_PT_INVOKE:
-        //TODO: check for the messages sent for wrong state?
-        if (!memcmp(pkt->data, "\002\000\006_error", 9)) {
-            uint8_t tmpstr[256];
-
-            if (!ff_amf_get_field_value(pkt->data + 9, data_end,
-                                        "description", tmpstr, sizeof(tmpstr)))
-                av_log(s, AV_LOG_ERROR, "Server error: %s\n",tmpstr);
-            return -1;
-        } else if (!memcmp(pkt->data, "\002\000\007_result", 10)) {
-            switch (rt->state) {
+        if (!ff_amf_get_field_value(pkt->data + 9, data_end,
+                                    "description", tmpstr, sizeof(tmpstr)))
+            av_log(s, AV_LOG_ERROR, "Server error: %s\n",tmpstr);
+        return -1;
+    } else if (!memcmp(pkt->data, "\002\000\007_result", 10)) {
+        switch (rt->state) {
             case STATE_HANDSHAKED:
                 if (!rt->is_input) {
                     if ((ret = gen_release_stream(s, rt)) < 0)
@@ -1044,35 +1017,73 @@ static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
                 }
                 rt->state = STATE_READY;
                 break;
-            }
-        } else if (!memcmp(pkt->data, "\002\000\010onStatus", 11)) {
-            const uint8_t* ptr = pkt->data + 11;
-            uint8_t tmpstr[256];
-
-            for (i = 0; i < 2; i++) {
-                t = ff_amf_tag_size(ptr, data_end);
-                if (t < 0)
-                    return 1;
-                ptr += t;
-            }
-            t = ff_amf_get_field_value(ptr, data_end,
-                                       "level", tmpstr, sizeof(tmpstr));
-            if (!t && !strcmp(tmpstr, "error")) {
-                if (!ff_amf_get_field_value(ptr, data_end,
-                                            "description", tmpstr, sizeof(tmpstr)))
-                    av_log(s, AV_LOG_ERROR, "Server error: %s\n",tmpstr);
-                return -1;
-            }
-            t = ff_amf_get_field_value(ptr, data_end,
-                                       "code", tmpstr, sizeof(tmpstr));
-            if (!t && !strcmp(tmpstr, "NetStream.Play.Start")) rt->state = STATE_PLAYING;
-            if (!t && !strcmp(tmpstr, "NetStream.Play.Stop")) rt->state = STATE_STOPPED;
-            if (!t && !strcmp(tmpstr, "NetStream.Play.UnpublishNotify")) rt->state = STATE_STOPPED;
-            if (!t && !strcmp(tmpstr, "NetStream.Publish.Start")) rt->state = STATE_PUBLISHING;
-        } else if (!memcmp(pkt->data, "\002\000\010onBWDone", 11)) {
-            if ((ret = gen_check_bw(s, rt)) < 0)
-                return ret;
         }
+    } else if (!memcmp(pkt->data, "\002\000\010onStatus", 11)) {
+        const uint8_t* ptr = pkt->data + 11;
+        uint8_t tmpstr[256];
+
+        for (i = 0; i < 2; i++) {
+            t = ff_amf_tag_size(ptr, data_end);
+            if (t < 0)
+                return 1;
+            ptr += t;
+        }
+        t = ff_amf_get_field_value(ptr, data_end,
+                                   "level", tmpstr, sizeof(tmpstr));
+        if (!t && !strcmp(tmpstr, "error")) {
+            if (!ff_amf_get_field_value(ptr, data_end,
+                                        "description", tmpstr, sizeof(tmpstr)))
+                av_log(s, AV_LOG_ERROR, "Server error: %s\n",tmpstr);
+            return -1;
+        }
+        t = ff_amf_get_field_value(ptr, data_end,
+                "code", tmpstr, sizeof(tmpstr));
+        if (!t && !strcmp(tmpstr, "NetStream.Play.Start")) rt->state = STATE_PLAYING;
+        if (!t && !strcmp(tmpstr, "NetStream.Play.Stop")) rt->state = STATE_STOPPED;
+        if (!t && !strcmp(tmpstr, "NetStream.Play.UnpublishNotify")) rt->state = STATE_STOPPED;
+        if (!t && !strcmp(tmpstr, "NetStream.Publish.Start")) rt->state = STATE_PUBLISHING;
+    } else if (!memcmp(pkt->data, "\002\000\010onBWDone", 11)) {
+        if ((ret = gen_check_bw(s, rt)) < 0)
+            return ret;
+    }
+
+    return 0;
+}
+
+/**
+ * Parse received packet and possibly perform some action depending on
+ * the packet contents.
+ * @return 0 for no errors, negative values for serious errors which prevent
+ *         further communications, positive values for uncritical errors
+ */
+static int rtmp_parse_result(URLContext *s, RTMPContext *rt, RTMPPacket *pkt)
+{
+    int ret;
+
+#ifdef DEBUG
+    ff_rtmp_packet_dump(s, pkt);
+#endif
+
+    switch (pkt->type) {
+    case RTMP_PT_CHUNK_SIZE:
+        if ((ret = handle_chunk_size(s, pkt)) < 0)
+            return ret;
+        break;
+    case RTMP_PT_PING:
+        if ((ret = handle_ping(s, pkt)) < 0)
+            return ret;
+        break;
+    case RTMP_PT_CLIENT_BW:
+        if ((ret = handle_client_bw(s, pkt)) < 0)
+            return ret;
+        break;
+    case RTMP_PT_SERVER_BW:
+        if ((ret = handle_server_bw(s, pkt)) < 0)
+            return ret;
+        break;
+    case RTMP_PT_INVOKE:
+        if ((ret = handle_invoke(s, pkt)) < 0)
+            return ret;
         break;
     case RTMP_PT_VIDEO:
     case RTMP_PT_AUDIO:

From 845e92fd6abf749a960354becdc5a9bc498f1f96 Mon Sep 17 00:00:00 2001
From: Yang Wang <yang.y.wang@intel.com>
Date: Tue, 24 Jul 2012 00:51:10 +0200
Subject: [PATCH 06/20] dsputil_mmx: fix incorrect assembly code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In ff_put_pixels_clamped_mmx(), there are two assembly code blocks.
In the first block (in the unrolled loop), the instructions
"movq 8%3, %%mm1 \n\t", and so forth, have problems.

From above instruction, it is clear what the programmer wants: a load from
p + 8. But this assembly code doesn’t guarantee that. It only works if the
compiler puts p in a register to produce an instruction like this:
"movq 8(%edi), %mm1". During compiler optimization, it is possible that the
compiler will be able to constant propagate into p. Suppose p = &x[10000].
Then operand 3 can become 10000(%edi), where %edi holds &x. And the instruction
becomes "movq 810000(%edx)". That is, it will stride by 810000 instead of 8.

This will cause a segmentation fault.

This error was fixed in the second block of the assembly code, but not in
the unrolled loop.

How to reproduce:
    This error is exposed when we build using Intel C++ Compiler, with
    IPO+PGO optimization enabled. Crashed when decoding an MJPEG video.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 libavcodec/x86/dsputil_mmx.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 5eb4a242c0..522a5658b7 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -245,14 +245,14 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
     pix = pixels;
     /* unrolled loop */
     __asm__ volatile (
-        "movq        %3, %%mm0          \n\t"
-        "movq       8%3, %%mm1          \n\t"
-        "movq      16%3, %%mm2          \n\t"
-        "movq      24%3, %%mm3          \n\t"
-        "movq      32%3, %%mm4          \n\t"
-        "movq      40%3, %%mm5          \n\t"
-        "movq      48%3, %%mm6          \n\t"
-        "movq      56%3, %%mm7          \n\t"
+        "movq      (%3), %%mm0          \n\t"
+        "movq     8(%3), %%mm1          \n\t"
+        "movq    16(%3), %%mm2          \n\t"
+        "movq    24(%3), %%mm3          \n\t"
+        "movq    32(%3), %%mm4          \n\t"
+        "movq    40(%3), %%mm5          \n\t"
+        "movq    48(%3), %%mm6          \n\t"
+        "movq    56(%3), %%mm7          \n\t"
         "packuswb %%mm1, %%mm0          \n\t"
         "packuswb %%mm3, %%mm2          \n\t"
         "packuswb %%mm5, %%mm4          \n\t"
@@ -262,7 +262,7 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
         "movq     %%mm4, (%0, %1, 2)    \n\t"
         "movq     %%mm6, (%0, %2)       \n\t"
         :: "r"(pix), "r"((x86_reg)line_size), "r"((x86_reg)line_size * 3),
-           "m"(*p)
+           "r"(p)
         : "memory");
     pix += line_size * 4;
     p   += 32;

From 79195ce56500a137c7d3152d83dc27d848086405 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 22 Jul 2012 21:14:20 +0000
Subject: [PATCH 07/20] x86/dsputil: put inline asm under HAVE_INLINE_ASM.

This allows compiling with compilers that don't support gcc-style
inline assembly.

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 libavcodec/dct-test.c            |  2 +-
 libavcodec/x86/dsputil_mmx.c     | 69 ++++++++++++++++++++++++--------
 libavcodec/x86/h264_qpel_mmx.c   |  4 +-
 libavcodec/x86/idct_mmx.c        |  4 ++
 libavcodec/x86/idct_mmx_xvid.c   |  4 ++
 libavcodec/x86/idct_sse2_xvid.c  |  4 ++
 libavcodec/x86/rv40dsp_init.c    |  2 +
 libavcodec/x86/simple_idct_mmx.c |  4 ++
 8 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index ceff448ae7..5046544500 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -108,7 +108,7 @@ static const struct algo idct_tab[] = {
     { "INT",            ff_j_rev_dct,          MMX_PERM },
     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
 
-#if HAVE_MMX
+#if HAVE_MMX && HAVE_INLINE_ASM
 #if CONFIG_GPL
     { "LIBMPEG2-MMX",   ff_mmx_idct,           MMX_PERM,  AV_CPU_FLAG_MMX,  1 },
     { "LIBMPEG2-MMX2",  ff_mmxext_idct,        MMX_PERM,  AV_CPU_FLAG_MMX2, 1 },
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 522a5658b7..e91ede531e 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -84,6 +84,8 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_FE)   = { 0xFEFEFEFEFEFEFEFEULL, 0xFEF
 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
 
+#if HAVE_INLINE_ASM
+
 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
 
@@ -1836,6 +1838,8 @@ void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
   avg_pixels16_xy2_mmx(dst, src, stride, 16);
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 #if HAVE_YASM
 typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src,
                                 x86_reg linesize, x86_reg start_y,
@@ -1904,6 +1908,8 @@ static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src,
 }
 #endif /* HAVE_YASM */
 
+#if HAVE_INLINE_ASM
+
 typedef void emulated_edge_mc_func(uint8_t *dst, const uint8_t *src,
                                    int linesize, int block_w, int block_h,
                                    int src_x, int src_y, int w, int h);
@@ -2073,6 +2079,8 @@ PREFETCH(prefetch_mmx2,  prefetcht0)
 PREFETCH(prefetch_3dnow, prefetch)
 #undef PREFETCH
 
+#endif /* HAVE_INLINE_ASM */
+
 #include "h264_qpel_mmx.c"
 
 void ff_put_h264_chroma_mc8_mmx_rnd  (uint8_t *dst, uint8_t *src,
@@ -2118,6 +2126,8 @@ CHROMA_MC(avg, 8, 10, sse2)
 CHROMA_MC(put, 8, 10, avx)
 CHROMA_MC(avg, 8, 10, avx)
 
+#if HAVE_INLINE_ASM
+
 /* CAVS-specific */
 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
 {
@@ -2476,6 +2486,8 @@ static void vector_clipf_sse(float *dst, const float *src,
     );
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2,
                                     int order);
 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
@@ -2588,6 +2600,7 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
+#if HAVE_INLINE_ASM
     c->put_pixels_clamped        = ff_put_pixels_clamped_mmx;
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
     c->add_pixels_clamped        = ff_add_pixels_clamped_mmx;
@@ -2610,10 +2623,6 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
 #if ARCH_X86_32 || !HAVE_YASM
     c->gmc = gmc_mmx;
 #endif
-#if ARCH_X86_32 && HAVE_YASM
-    if (!high_bit_depth)
-        c->emulated_edge_mc = emulated_edge_mc_mmx;
-#endif
 
     c->add_bytes = add_bytes_mmx;
 
@@ -2621,8 +2630,14 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
         c->h263_v_loop_filter = h263_v_loop_filter_mmx;
         c->h263_h_loop_filter = h263_h_loop_filter_mmx;
     }
+#endif /* HAVE_INLINE_ASM */
 
 #if HAVE_YASM
+#if ARCH_X86_32
+    if (!high_bit_depth)
+        c->emulated_edge_mc = emulated_edge_mc_mmx;
+#endif
+
     if (!high_bit_depth && CONFIG_H264CHROMA) {
         c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmx_rnd;
         c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
@@ -2639,6 +2654,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
     const int bit_depth      = avctx->bits_per_raw_sample;
     const int high_bit_depth = bit_depth > 8;
 
+#if HAVE_INLINE_ASM
     c->prefetch = prefetch_mmx2;
 
     if (!high_bit_depth) {
@@ -2674,22 +2690,27 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
         c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2;
         c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
     }
+#endif /* HAVE_INLINE_ASM */
 
     if (CONFIG_H264QPEL) {
+#if HAVE_INLINE_ASM
         SET_QPEL_FUNCS(put_qpel,        0, 16, mmx2, );
         SET_QPEL_FUNCS(put_qpel,        1,  8, mmx2, );
         SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
         SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, mmx2, );
         SET_QPEL_FUNCS(avg_qpel,        0, 16, mmx2, );
         SET_QPEL_FUNCS(avg_qpel,        1,  8, mmx2, );
+#endif /* HAVE_INLINE_ASM */
 
         if (!high_bit_depth) {
+#if HAVE_INLINE_ASM
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, );
             SET_QPEL_FUNCS(put_h264_qpel, 1,  8, mmx2, );
             SET_QPEL_FUNCS(put_h264_qpel, 2,  4, mmx2, );
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmx2, );
             SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmx2, );
+#endif /* HAVE_INLINE_ASM */
         } else if (bit_depth == 10) {
 #if HAVE_YASM
 #if !ARCH_X86_64
@@ -2703,10 +2724,12 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
 #endif
         }
 
+#if HAVE_INLINE_ASM
         SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
         SET_QPEL_FUNCS(put_2tap_qpel, 1,  8, mmx2, );
         SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
         SET_QPEL_FUNCS(avg_2tap_qpel, 1,  8, mmx2, );
+#endif /* HAVE_INLINE_ASM */
     }
 
 #if HAVE_YASM
@@ -2741,6 +2764,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
+#if HAVE_INLINE_ASM
     c->prefetch = prefetch_3dnow;
 
     if (!high_bit_depth) {
@@ -2798,25 +2822,26 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
         SET_QPEL_FUNCS(avg_2tap_qpel, 1,  8, 3dnow, );
     }
 
-#if HAVE_YASM
-    if (!high_bit_depth && CONFIG_H264CHROMA) {
-        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_3dnow_rnd;
-        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
-    }
-#endif
-
     c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
 
 #if HAVE_7REGS
     if (mm_flags & AV_CPU_FLAG_CMOV)
         c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
 #endif
+#endif /* HAVE_INLINE_ASM */
+
+#if HAVE_YASM
+    if (!high_bit_depth && CONFIG_H264CHROMA) {
+        c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_3dnow_rnd;
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
+    }
+#endif
 }
 
 static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx,
                                 int mm_flags)
 {
-#if HAVE_6REGS
+#if HAVE_6REGS && HAVE_INLINE_ASM
     c->vector_fmul_window  = vector_fmul_window_3dnow2;
 #endif
 }
@@ -2825,6 +2850,7 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
+#if HAVE_INLINE_ASM
     if (!high_bit_depth) {
         if (!(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)) {
             /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
@@ -2835,31 +2861,35 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
 
     c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
     c->ac3_downmix             = ac3_downmix_sse;
-#if HAVE_YASM
-    c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
-    c->vector_fmul_add     = ff_vector_fmul_add_sse;
-#endif
 
 #if HAVE_6REGS
     c->vector_fmul_window = vector_fmul_window_sse;
 #endif
 
     c->vector_clipf = vector_clipf_sse;
+#endif /* HAVE_INLINE_ASM */
 
 #if HAVE_YASM
+    c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
+    c->vector_fmul_add     = ff_vector_fmul_add_sse;
+
     c->scalarproduct_float          = ff_scalarproduct_float_sse;
     c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
 
     if (!high_bit_depth)
         c->emulated_edge_mc = emulated_edge_mc_sse;
+#if HAVE_INLINE_ASM
     c->gmc = gmc_sse;
 #endif
+#endif
 }
 
 static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
                               int mm_flags)
 {
     const int bit_depth      = avctx->bits_per_raw_sample;
+
+#if HAVE_INLINE_ASM
     const int high_bit_depth = bit_depth > 8;
 
     if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
@@ -2887,6 +2917,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
         H264_QPEL_FUNCS(3, 2, sse2);
         H264_QPEL_FUNCS(3, 3, sse2);
     }
+#endif /* HAVE_INLINE_ASM */
 
 #if HAVE_YASM
     if (bit_depth == 10) {
@@ -2928,6 +2959,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
     const int bit_depth      = avctx->bits_per_raw_sample;
 
+#if HAVE_INLINE_ASM
     if (!high_bit_depth && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS(1, 0, ssse3);
         H264_QPEL_FUNCS(1, 1, ssse3);
@@ -2942,8 +2974,9 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
         H264_QPEL_FUNCS(3, 2, ssse3);
         H264_QPEL_FUNCS(3, 3, ssse3);
     }
+#endif /* HAVE_INLINE_ASM */
 #if HAVE_YASM
-    else if (bit_depth == 10 && CONFIG_H264QPEL) {
+    if (bit_depth == 10 && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
         H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
         H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
@@ -3007,6 +3040,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
     int mm_flags = av_get_cpu_flags();
 
     if (mm_flags & AV_CPU_FLAG_MMX) {
+#if HAVE_INLINE_ASM
         const int idct_algo = avctx->idct_algo;
 
         if (avctx->bits_per_raw_sample <= 8) {
@@ -3047,6 +3081,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
                 }
             }
         }
+#endif /* HAVE_INLINE_ASM */
 
         dsputil_init_mmx(c, avctx, mm_flags);
     }
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index 85ae07e9f4..fc1635de8b 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -21,6 +21,8 @@
 
 #include "dsputil_mmx.h"
 
+#if HAVE_INLINE_ASM
+
 /***********************************/
 /* motion compensation */
 
@@ -1191,7 +1193,7 @@ H264_MC_816(H264_MC_H, ssse3)
 H264_MC_816(H264_MC_HV, ssse3)
 #endif
 
-
+#endif /* HAVE_INLINE_ASM */
 
 //10bit
 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
diff --git a/libavcodec/x86/idct_mmx.c b/libavcodec/x86/idct_mmx.c
index f199941f55..2408ab26ad 100644
--- a/libavcodec/x86/idct_mmx.c
+++ b/libavcodec/x86/idct_mmx.c
@@ -25,6 +25,8 @@
 #include "libavutil/x86_cpu.h"
 #include "dsputil_mmx.h"
 
+#if HAVE_INLINE_ASM
+
 #define ROW_SHIFT 11
 #define COL_SHIFT 6
 
@@ -626,3 +628,5 @@ declare_idct (ff_mmxext_idct, mmxext_table,
 
 declare_idct (ff_mmx_idct, mmx_table,
               mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
+
+#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c
index 139798e44b..e4c778c398 100644
--- a/libavcodec/x86/idct_mmx_xvid.c
+++ b/libavcodec/x86/idct_mmx_xvid.c
@@ -43,6 +43,8 @@
 #include "libavcodec/avcodec.h"
 #include "idct_xvid.h"
 
+#if HAVE_INLINE_ASM
+
 //=============================================================================
 // Macros and other preprocessor constants
 //=============================================================================
@@ -523,3 +525,5 @@ __asm__ volatile(
     DCT_8_INV_COL(8(%0), 8(%0))
     :: "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm), "r"(tg_1_16));
 }
+
+#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index 968b400c6d..8249e97ccf 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -43,6 +43,8 @@
 #include "idct_xvid.h"
 #include "dsputil_mmx.h"
 
+#if HAVE_INLINE_ASM
+
 /**
  * @file
  * @brief SSE2 idct compatible with xvidmmx
@@ -401,3 +403,5 @@ void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
     ff_idct_xvid_sse2(block);
     ff_add_pixels_clamped_mmx(block, dest, line_size);
 }
+
+#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index e429cc3359..99ba0d5737 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -190,10 +190,12 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
     if (mm_flags & AV_CPU_FLAG_MMX) {
         c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
         c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
+#if HAVE_INLINE_ASM
         c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx;
         c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx;
         c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx;
         c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx;
+#endif /* HAVE_INLINE_ASM */
 #if ARCH_X86_32
         QPEL_MC_SET(put_, _mmx)
 #endif
diff --git a/libavcodec/x86/simple_idct_mmx.c b/libavcodec/x86/simple_idct_mmx.c
index dc285cf728..f455eb8974 100644
--- a/libavcodec/x86/simple_idct_mmx.c
+++ b/libavcodec/x86/simple_idct_mmx.c
@@ -23,6 +23,8 @@
 #include "libavcodec/simple_idct.h"
 #include "dsputil_mmx.h"
 
+#if HAVE_INLINE_ASM
+
 /*
 23170.475006
 22725.260826
@@ -1161,3 +1163,5 @@ void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
     idct(block);
     ff_add_pixels_clamped_mmx(block, dest, line_size);
 }
+
+#endif /* HAVE_INLINE_ASM */

From a1878a88a1dc3e3b0abaee910a18f0a6a30b0805 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Sun, 22 Jul 2012 20:38:56 +0000
Subject: [PATCH 08/20] vp3: don't use calls to inline asm in yasm code.

Mixing yasm and inline asm is a bad idea, since if either yasm or inline
asm is not supported by your toolchain, all of the asm stops working.
Thus, better to use either one or the other alone.

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 libavcodec/x86/vp3dsp.asm | 124 +++++++++++++++++++++++++-------------
 1 file changed, 81 insertions(+), 43 deletions(-)

diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 58fa1f7b27..af2f60c6ae 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -38,13 +38,11 @@ cextern pb_1
 cextern pb_3
 cextern pb_7
 cextern pb_1F
+cextern pb_80
 cextern pb_81
 
 cextern pw_8
 
-cextern put_signed_pixels_clamped_mmx
-cextern add_pixels_clamped_mmx
-
 SECTION .text
 
 ; this is off by one or two for some cases when filter_limit is greater than 63
@@ -523,56 +521,96 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
         PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
 %endmacro
 
-%macro vp3_idct_funcs 3
-cglobal vp3_idct_put_%1, 3, %3, %2
+%macro vp3_idct_funcs 1
+cglobal vp3_idct_put_%1, 3, 4, 9
     VP3_IDCT_%1   r2
-%if ARCH_X86_64
-    mov           r3, r2
-    mov           r2, r1
-    mov           r1, r0
-    mov           r0, r3
-%else
-    mov          r0m, r2
-    mov          r1m, r0
-    mov          r2m, r1
-%endif
-%if WIN64
-    call put_signed_pixels_clamped_mmx
-    RET
-%else
-    jmp put_signed_pixels_clamped_mmx
-%endif
 
-cglobal vp3_idct_add_%1, 3, %3, %2
-    VP3_IDCT_%1   r2
-%if ARCH_X86_64
-    mov           r3, r2
-    mov           r2, r1
-    mov           r1, r0
-    mov           r0, r3
+    movsxdifnidn  r1, r1d
+    mova          m4, [pb_80]
+    lea           r3, [r1*3]
+%assign %%i 0
+%rep 16/mmsize
+    mova          m0, [r2+mmsize*0+%%i]
+    mova          m1, [r2+mmsize*2+%%i]
+    mova          m2, [r2+mmsize*4+%%i]
+    mova          m3, [r2+mmsize*6+%%i]
+    packsswb      m0, [r2+mmsize*1+%%i]
+    packsswb      m1, [r2+mmsize*3+%%i]
+    packsswb      m2, [r2+mmsize*5+%%i]
+    packsswb      m3, [r2+mmsize*7+%%i]
+    paddb         m0, m4
+    paddb         m1, m4
+    paddb         m2, m4
+    paddb         m3, m4
+    movq   [r0     ], m0
+%if mmsize == 8
+    movq   [r0+r1  ], m1
+    movq   [r0+r1*2], m2
+    movq   [r0+r3  ], m3
 %else
-    mov          r0m, r2
-    mov          r1m, r0
-    mov          r2m, r1
+    movhps [r0+r1  ], m0
+    movq   [r0+r1*2], m1
+    movhps [r0+r3  ], m1
 %endif
-%if WIN64
-    call add_pixels_clamped_mmx
+%if %%i == 0
+    lea           r0, [r0+r1*4]
+%endif
+%if mmsize == 16
+    movq   [r0     ], m2
+    movhps [r0+r1  ], m2
+    movq   [r0+r1*2], m3
+    movhps [r0+r3  ], m3
+%endif
+%assign %%i %%i+64
+%endrep
     RET
-%else
-    jmp add_pixels_clamped_mmx
+
+cglobal vp3_idct_add_%1, 3, 4, 9
+    VP3_IDCT_%1   r2
+
+    mov           r3, 4
+    pxor          m4, m4
+    movsxdifnidn  r1, r1d
+.loop:
+    movq          m0, [r0]
+    movq          m1, [r0+r1]
+%if mmsize == 8
+    mova          m2, m0
+    mova          m3, m1
 %endif
+    punpcklbw     m0, m4
+    punpcklbw     m1, m4
+%if mmsize == 8
+    punpckhbw     m2, m4
+    punpckhbw     m3, m4
+%endif
+    paddsw        m0, [r2+ 0]
+    paddsw        m1, [r2+16]
+%if mmsize == 8
+    paddsw        m2, [r2+ 8]
+    paddsw        m3, [r2+24]
+    packuswb      m0, m2
+    packuswb      m1, m3
+%else ; mmsize == 16
+    packuswb      m0, m1
+%endif
+    movq     [r0   ], m0
+%if mmsize == 8
+    movq     [r0+r1], m1
+%else ; mmsize == 16
+    movhps   [r0+r1], m0
+%endif
+    lea           r0, [r0+r1*2]
+    add           r2, 32
+    dec           r3
+    jg .loop
+    RET
 %endmacro
 
-%if ARCH_X86_64
-%define REGS 4
-%else
-%define REGS 3
-%endif
 INIT_MMX
-vp3_idct_funcs mmx,  0, REGS
+vp3_idct_funcs mmx
 INIT_XMM
-vp3_idct_funcs sse2, 9, REGS
-%undef REGS
+vp3_idct_funcs sse2
 
 %macro DC_ADD 0
     movq          m2, [r0     ]

From 48098788c240f678222d45db79aff38f494f3df5 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Tue, 24 Jul 2012 21:23:12 +0000
Subject: [PATCH 09/20] vp8: Replace x*155/100 by x*101581>>16.

Idea stolen from webp (by Pascal Massimino) - because it's Cool.

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 libavcodec/vp8.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index d0e2a0cccb..a662f8a2df 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -249,12 +249,13 @@ static void get_quants(VP8Context *s)
         } else
             base_qi = yac_qi;
 
-        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
-        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
-        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
-        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
-        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
-        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
+        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
+        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
+        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
+        /* 101581>>16 is equivalent to 155/100 */
+        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
+        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
+        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
 
         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);

From 0dadf9d1e96c82a5e56489e9e8151916da968079 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Wed, 25 Jul 2012 14:49:40 -0400
Subject: [PATCH 10/20] lavr: x86: add missing vzeroupper in
 ff_mix_1_to_2_fltp_flt()

---
 libavresample/x86/audio_mix.asm | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index 4b0434dd6d..58a4ded8c6 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -175,7 +175,12 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
     add       src0q, mmsize
     sub        lend, mmsize/4
     jg .loop
+%if mmsize == 32
+    vzeroupper
+    RET
+%else
     REP_RET
+%endif
 %endmacro
 
 INIT_XMM sse

From e7ea6883bfaabaad20ac3d369c8293e693e3c22b Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 25 Jul 2012 20:51:08 +0200
Subject: [PATCH 11/20] rtmp: Return proper error codes in handle_chunk_size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 31d1f69e94..5aa6a540c5 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -889,7 +889,7 @@ static int handle_chunk_size(URLContext *s, RTMPPacket *pkt)
         av_log(s, AV_LOG_ERROR,
                "Chunk size change packet is not 4 bytes long (%d)\n",
                pkt->data_size);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
 
     if (!rt->is_input) {
@@ -901,7 +901,7 @@ static int handle_chunk_size(URLContext *s, RTMPPacket *pkt)
     rt->chunk_size = AV_RB32(pkt->data);
     if (rt->chunk_size <= 0) {
         av_log(s, AV_LOG_ERROR, "Incorrect chunk size %d\n", rt->chunk_size);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     av_log(s, AV_LOG_DEBUG, "New chunk size = %d\n", rt->chunk_size);
 

From 088a82bb33d64b95e71a9a7f06ca19cc61ffa8d4 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 25 Jul 2012 20:51:09 +0200
Subject: [PATCH 12/20] rtmp: Return proper error code in handle_client_bw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 5aa6a540c5..be61074b16 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -930,7 +930,7 @@ static int handle_client_bw(URLContext *s, RTMPPacket *pkt)
         av_log(s, AV_LOG_ERROR,
                "Client bandwidth report packet is less than 4 bytes long (%d)\n",
                pkt->data_size);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     av_log(s, AV_LOG_DEBUG, "Client bandwidth = %d\n", AV_RB32(pkt->data));
     rt->client_report_size = AV_RB32(pkt->data) >> 1;

From be8f949219c03e6bf93d8d563ef33b8ad4faa1e9 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 25 Jul 2012 20:51:10 +0200
Subject: [PATCH 13/20] rtmp: Return proper error code in handle_server_bw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index be61074b16..5ac60d21a0 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -946,7 +946,7 @@ static int handle_server_bw(URLContext *s, RTMPPacket *pkt)
     if (rt->server_bw <= 0) {
         av_log(s, AV_LOG_ERROR, "Incorrect server bandwidth %d\n",
                rt->server_bw);
-        return AVERROR(EINVAL);
+        return AVERROR_INVALIDDATA;
     }
     av_log(s, AV_LOG_DEBUG, "Server bandwidth = %d\n", rt->server_bw);
 

From abf77a247baa19357bbf41d6460d4f65a8ff07f2 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Wed, 25 Jul 2012 20:51:11 +0200
Subject: [PATCH 14/20] rtmp: Return an error when the client bandwidth is
 incorrect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtmpproto.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index 5ac60d21a0..a2efe3882f 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -932,8 +932,16 @@ static int handle_client_bw(URLContext *s, RTMPPacket *pkt)
                pkt->data_size);
         return AVERROR_INVALIDDATA;
     }
-    av_log(s, AV_LOG_DEBUG, "Client bandwidth = %d\n", AV_RB32(pkt->data));
-    rt->client_report_size = AV_RB32(pkt->data) >> 1;
+
+    rt->client_report_size = AV_RB32(pkt->data);
+    if (rt->client_report_size <= 0) {
+        av_log(s, AV_LOG_ERROR, "Incorrect client bandwidth %d\n",
+                rt->client_report_size);
+        return AVERROR_INVALIDDATA;
+
+    }
+    av_log(s, AV_LOG_DEBUG, "Client bandwidth = %d\n", rt->client_report_size);
+    rt->client_report_size >>= 1;
 
     return 0;
 }

From 5423e908c9f5f12f599f6c9625ac9539be671695 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antti=20Sepp=C3=A4l=C3=A4?=
 <a.seppala+libav-devel@gmail.com>
Date: Wed, 25 Jul 2012 12:43:39 +0300
Subject: [PATCH 15/20] Support urlencoded http authentication credentials
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It should be possible to specify usernames in http requests containing
urlencoded characters. This patch adds support for decoding the auth
strings.

Signed-off-by: Antti Seppälä <a.seppala@gmail.com>
Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/Makefile    | 11 +++---
 libavformat/httpauth.c  | 25 ++++++++----
 libavformat/urldecode.c | 87 +++++++++++++++++++++++++++++++++++++++++
 libavformat/urldecode.h | 35 +++++++++++++++++
 libavformat/version.h   |  2 +-
 5 files changed, 147 insertions(+), 13 deletions(-)
 create mode 100644 libavformat/urldecode.c
 create mode 100644 libavformat/urldecode.h

diff --git a/libavformat/Makefile b/libavformat/Makefile
index 2eb3e9c8de..ffb234f3d4 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -277,9 +277,10 @@ OBJS-$(CONFIG_RTPDEC)                    += rdt.o         \
                                             rtpdec_svq3.o \
                                             rtpdec_vp8.o  \
                                             rtpdec_xiph.o
-OBJS-$(CONFIG_RTSP_DEMUXER)              += rtsp.o rtspdec.o httpauth.o
+OBJS-$(CONFIG_RTSP_DEMUXER)              += rtsp.o rtspdec.o httpauth.o \
+                                            urldecode.o
 OBJS-$(CONFIG_RTSP_MUXER)                += rtsp.o rtspenc.o httpauth.o \
-                                            rtpenc_chain.o
+                                            rtpenc_chain.o urldecode.o
 OBJS-$(CONFIG_SAP_DEMUXER)               += sapdec.o
 OBJS-$(CONFIG_SAP_MUXER)                 += sapenc.o rtpenc_chain.o
 OBJS-$(CONFIG_SDP_DEMUXER)               += rtsp.o
@@ -346,9 +347,9 @@ OBJS-$(CONFIG_FFRTMPHTTP_PROTOCOL)       += rtmphttp.o
 OBJS-$(CONFIG_FILE_PROTOCOL)             += file.o
 OBJS-$(CONFIG_GOPHER_PROTOCOL)           += gopher.o
 OBJS-$(CONFIG_HLS_PROTOCOL)              += hlsproto.o
-OBJS-$(CONFIG_HTTP_PROTOCOL)             += http.o httpauth.o
-OBJS-$(CONFIG_HTTPPROXY_PROTOCOL)        += http.o httpauth.o
-OBJS-$(CONFIG_HTTPS_PROTOCOL)            += http.o httpauth.o
+OBJS-$(CONFIG_HTTP_PROTOCOL)             += http.o httpauth.o urldecode.o
+OBJS-$(CONFIG_HTTPPROXY_PROTOCOL)        += http.o httpauth.o urldecode.o
+OBJS-$(CONFIG_HTTPS_PROTOCOL)            += http.o httpauth.o urldecode.o
 OBJS-$(CONFIG_MMSH_PROTOCOL)             += mmsh.o mms.o asf.o
 OBJS-$(CONFIG_MMST_PROTOCOL)             += mmst.o mms.o asf.o
 OBJS-$(CONFIG_MD5_PROTOCOL)              += md5proto.o
diff --git a/libavformat/httpauth.c b/libavformat/httpauth.c
index c1cf019eda..4ec8ac2599 100644
--- a/libavformat/httpauth.c
+++ b/libavformat/httpauth.c
@@ -25,6 +25,7 @@
 #include "internal.h"
 #include "libavutil/random_seed.h"
 #include "libavutil/md5.h"
+#include "urldecode.h"
 #include "avformat.h"
 #include <ctype.h>
 
@@ -251,18 +252,28 @@ char *ff_http_auth_create_response(HTTPAuthState *state, const char *auth,
         return NULL;
 
     if (state->auth_type == HTTP_AUTH_BASIC) {
-        int auth_b64_len = AV_BASE64_SIZE(strlen(auth));
-        int len = auth_b64_len + 30;
-        char *ptr;
-        authstr = av_malloc(len);
-        if (!authstr)
+        int auth_b64_len, len;
+        char *ptr, *decoded_auth = ff_urldecode(auth);
+
+        if (!decoded_auth)
             return NULL;
+
+        auth_b64_len = AV_BASE64_SIZE(strlen(decoded_auth));
+        len = auth_b64_len + 30;
+
+        authstr = av_malloc(len);
+        if (!authstr) {
+            av_free(decoded_auth);
+            return NULL;
+        }
+
         snprintf(authstr, len, "Authorization: Basic ");
         ptr = authstr + strlen(authstr);
-        av_base64_encode(ptr, auth_b64_len, auth, strlen(auth));
+        av_base64_encode(ptr, auth_b64_len, decoded_auth, strlen(decoded_auth));
         av_strlcat(ptr, "\r\n", len - (ptr - authstr));
+        av_free(decoded_auth);
     } else if (state->auth_type == HTTP_AUTH_DIGEST) {
-        char *username = av_strdup(auth), *password;
+        char *username = ff_urldecode(auth), *password;
 
         if (!username)
             return NULL;
diff --git a/libavformat/urldecode.c b/libavformat/urldecode.c
new file mode 100644
index 0000000000..32460da4f9
--- /dev/null
+++ b/libavformat/urldecode.c
@@ -0,0 +1,87 @@
+/*
+ * Simple URL decoding function
+ * Copyright (c) 2012 Antti Seppälä
+ *
+ * References:
+ *  RFC 3986: Uniform Resource Identifier (URI): Generic Syntax
+ *       T. Berners-Lee et al. The Internet Society, 2005
+ *
+ * based on http://www.icosaedro.it/apache/urldecode.c
+ *          from Umberto Salsi (salsi@icosaedro.it)
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <ctype.h>
+#include <string.h>
+
+#include "libavutil/mem.h"
+#include "libavutil/avstring.h"
+#include "urldecode.h"
+
+char *ff_urldecode(const char *url)
+{
+    int s = 0, d = 0, url_len = 0;
+    char c;
+    char *dest = NULL;
+
+    if (!url)
+        return NULL;
+
+    url_len = strlen(url) + 1;
+    dest = av_malloc(url_len);
+
+    if (!dest)
+        return NULL;
+
+    while (s < url_len) {
+        c = url[s++];
+
+        if (c == '%' && s + 2 < url_len) {
+            char c2 = url[s++];
+            char c3 = url[s++];
+            if (isxdigit(c2) && isxdigit(c3)) {
+                c2 = av_tolower(c2);
+                c3 = av_tolower(c3);
+
+                if (c2 <= '9')
+                    c2 = c2 - '0';
+                else
+                    c2 = c2 - 'a' + 10;
+
+                if (c3 <= '9')
+                    c3 = c3 - '0';
+                else
+                    c3 = c3 - 'a' + 10;
+
+                dest[d++] = 16 * c2 + c3;
+
+            } else { /* %zz or something other invalid */
+                dest[d++] = c;
+                dest[d++] = c2;
+                dest[d++] = c3;
+            }
+        } else if (c == '+') {
+            dest[d++] = ' ';
+        } else {
+            dest[d++] = c;
+        }
+
+    }
+
+    return dest;
+}
diff --git a/libavformat/urldecode.h b/libavformat/urldecode.h
new file mode 100644
index 0000000000..b43f319c9e
--- /dev/null
+++ b/libavformat/urldecode.h
@@ -0,0 +1,35 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_URLDECODE_H
+#define AVFORMAT_URLDECODE_H
+
+/**
+ * Decodes an URL from its percent-encoded form back into normal
+ * representation. This function returns the decoded URL in a string.
+ * The URL to be decoded does not necessarily have to be encoded but
+ * in that case the original string is duplicated.
+ *
+ * @param url a string to be decoded.
+ * @return new string with the URL decoded or NULL if decoding failed.
+ * Note that the returned string should be explicitly freed when not
+ * used anymore.
+ */
+char *ff_urldecode(const char *url);
+
+#endif /* AVFORMAT_URLDECODE_H */
diff --git a/libavformat/version.h b/libavformat/version.h
index bc6cae664b..de6da526c3 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@
 
 #define LIBAVFORMAT_VERSION_MAJOR 54
 #define LIBAVFORMAT_VERSION_MINOR 12
-#define LIBAVFORMAT_VERSION_MICRO  0
+#define LIBAVFORMAT_VERSION_MICRO  1
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From 08fc1ad151fcaac67f4550c224254352c14e4e10 Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Wed, 25 Jul 2012 08:42:25 +0200
Subject: [PATCH 16/20] vf_overlay: prevent premature freeing of cur_buf

Signed-off-by: Anton Khirnov <anton@khirnov.net>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavfilter/vf_overlay.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 14304c1918..5f34895058 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -238,6 +238,7 @@ static int start_frame_overlay(AVFilterLink *inlink, AVFilterBufferRef *inpicref
     AVFilterContext *ctx = inlink->dst;
     OverlayContext *over = ctx->priv;
 
+    inlink->cur_buf  = NULL;
     over->overpicref = inpicref;
     over->overpicref->pts = av_rescale_q(inpicref->pts, ctx->inputs[OVERLAY]->time_base,
                                          ctx->outputs[0]->time_base);

From f431315a866da9600e3eaa99fc54da1f554f170c Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 25 Jul 2012 08:42:26 +0200
Subject: [PATCH 17/20] vf_overlay: ensure the overlay frame does not get
 leaked.

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavfilter/vf_overlay.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 5f34895058..5a8da27a16 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -239,6 +239,7 @@ static int start_frame_overlay(AVFilterLink *inlink, AVFilterBufferRef *inpicref
     OverlayContext *over = ctx->priv;
 
     inlink->cur_buf  = NULL;
+    avfilter_unref_bufferp(&over->overpicref);
     over->overpicref = inpicref;
     over->overpicref->pts = av_rescale_q(inpicref->pts, ctx->inputs[OVERLAY]->time_base,
                                          ctx->outputs[0]->time_base);

From b5c8aa745ead1b9a561ced99179291bb94048c84 Mon Sep 17 00:00:00 2001
From: Anton Khirnov <anton@khirnov.net>
Date: Wed, 25 Jul 2012 08:42:27 +0200
Subject: [PATCH 18/20] vf_yadif: unset cur_buf on the input link.

The buffer is stored internally, so this prevents it from being unreffed
automatically.

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavfilter/vf_yadif.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index c6d78a5ae9..a2b7337224 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -221,6 +221,7 @@ static int start_frame(AVFilterLink *link, AVFilterBufferRef *picref)
     yadif->prev = yadif->cur;
     yadif->cur  = yadif->next;
     yadif->next = picref;
+    link->cur_buf = NULL;
 
     if (!yadif->cur)
         return 0;

From 92566540c3ce554b656b4d99b9bcd7870d8a41e0 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <cehoyos@ag.or.at>
Date: Mon, 23 Jul 2012 18:45:17 -0400
Subject: [PATCH 19/20] Fix typo in v410 decoder.

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
Signed-off-by: Kostya Shishkov <kostya.shishkov@gmail.com>
---
 libavcodec/v410dec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/v410dec.c b/libavcodec/v410dec.c
index d7660ee4fb..93545ab102 100644
--- a/libavcodec/v410dec.c
+++ b/libavcodec/v410dec.c
@@ -30,10 +30,10 @@ static av_cold int v410_decode_init(AVCodecContext *avctx)
 
     if (avctx->width & 1) {
         if (avctx->err_recognition & AV_EF_EXPLODE) {
-            av_log(avctx, AV_LOG_ERROR, "v410 requires width to be even, continuing anyway.\n");
+            av_log(avctx, AV_LOG_ERROR, "v410 requires width to be even.\n");
             return AVERROR_INVALIDDATA;
         } else {
-            av_log(avctx, AV_LOG_WARNING, "v410 requires width to be even.\n");
+            av_log(avctx, AV_LOG_WARNING, "v410 requires width to be even, continuing anyway.\n");
         }
     }
 

From 44dc9c6af0377faf2a99889d1f949e32a1102e84 Mon Sep 17 00:00:00 2001
From: Michael Bradshaw <mbradshaw@sorensonmedia.com>
Date: Tue, 24 Jul 2012 19:43:06 +0200
Subject: [PATCH 20/20] libopenjpeg: support YUV and deep RGB pixel formats

Based on FFmpeg version from
commit 3275981207e30e140cffaea334ac390f1a04266a

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
---
 libavcodec/libopenjpegdec.c | 297 +++++++++++++++++++++++++++++-------
 1 file changed, 246 insertions(+), 51 deletions(-)

diff --git a/libavcodec/libopenjpegdec.c b/libavcodec/libopenjpegdec.c
index 6696ee5269..c84a9aca03 100644
--- a/libavcodec/libopenjpegdec.c
+++ b/libavcodec/libopenjpegdec.c
@@ -27,8 +27,9 @@
 #define  OPJ_STATIC
 #include <openjpeg.h>
 
-#include "libavutil/imgutils.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/pixfmt.h"
 #include "libavutil/opt.h"
 #include "avcodec.h"
 #include "thread.h"
@@ -36,6 +37,32 @@
 #define JP2_SIG_TYPE    0x6A502020
 #define JP2_SIG_VALUE   0x0D0A870A
 
+// pix_fmts with lower bpp have to be listed before
+// similar pix_fmts with higher bpp.
+#define RGB_PIXEL_FORMATS  PIX_FMT_RGB24, PIX_FMT_RGBA,  \
+                           PIX_FMT_RGB48
+
+#define GRAY_PIXEL_FORMATS PIX_FMT_GRAY8, PIX_FMT_Y400A, \
+                           PIX_FMT_GRAY16
+
+#define YUV_PIXEL_FORMATS  PIX_FMT_YUV410P,   PIX_FMT_YUV411P,   \
+                           PIX_FMT_YUVA420P, \
+                           PIX_FMT_YUV420P,   PIX_FMT_YUV422P,   \
+                           PIX_FMT_YUV440P,   PIX_FMT_YUV444P,   \
+                           PIX_FMT_YUV420P9,  PIX_FMT_YUV422P9,  \
+                           PIX_FMT_YUV444P9, \
+                           PIX_FMT_YUV420P10, PIX_FMT_YUV422P10, \
+                           PIX_FMT_YUV444P10, \
+                           PIX_FMT_YUV420P16, PIX_FMT_YUV422P16, \
+                           PIX_FMT_YUV444P16
+
+static const enum PixelFormat rgb_pix_fmts[]  = {RGB_PIXEL_FORMATS};
+static const enum PixelFormat gray_pix_fmts[] = {GRAY_PIXEL_FORMATS};
+static const enum PixelFormat yuv_pix_fmts[]  = {YUV_PIXEL_FORMATS};
+static const enum PixelFormat any_pix_fmts[]  = {RGB_PIXEL_FORMATS,
+                                                 GRAY_PIXEL_FORMATS,
+                                                 YUV_PIXEL_FORMATS};
+
 typedef struct {
     AVClass *class;
     opj_dparameters_t dec_params;
@@ -44,14 +71,165 @@ typedef struct {
     int lowqual;
 } LibOpenJPEGContext;
 
-static int check_image_attributes(opj_image_t *image)
+static int libopenjpeg_matches_pix_fmt(const opj_image_t *img,
+                                       enum PixelFormat pix_fmt)
 {
-    return image->comps[0].dx   == image->comps[1].dx   &&
-           image->comps[1].dx   == image->comps[2].dx   &&
-           image->comps[0].dy   == image->comps[1].dy   &&
-           image->comps[1].dy   == image->comps[2].dy   &&
-           image->comps[0].prec == image->comps[1].prec &&
-           image->comps[1].prec == image->comps[2].prec;
+    AVPixFmtDescriptor des = av_pix_fmt_descriptors[pix_fmt];
+    int match = 1;
+
+    if (des.nb_components != img->numcomps) {
+        return 0;
+    }
+
+    switch (des.nb_components) {
+    case 4:
+        match = match &&
+            des.comp[3].depth_minus1 + 1 >= img->comps[3].prec &&
+            1 == img->comps[3].dx &&
+            1 == img->comps[3].dy;
+    case 3:
+        match = match &&
+            des.comp[2].depth_minus1 + 1 >= img->comps[2].prec &&
+            1 << des.log2_chroma_w == img->comps[2].dx &&
+            1 << des.log2_chroma_h == img->comps[2].dy;
+    case 2:
+        match = match &&
+            des.comp[1].depth_minus1 + 1 >= img->comps[1].prec &&
+            1 << des.log2_chroma_w == img->comps[1].dx &&
+            1 << des.log2_chroma_h == img->comps[1].dy;
+    case 1:
+        match = match &&
+            des.comp[0].depth_minus1 + 1 >= img->comps[0].prec &&
+            1 == img->comps[0].dx &&
+            1 == img->comps[0].dy;
+    default:
+        break;
+    }
+
+    return match;
+}
+
+static enum PixelFormat libopenjpeg_guess_pix_fmt(const opj_image_t *image)
+{
+    int index;
+    const enum PixelFormat *possible_fmts = NULL;
+    int possible_fmts_nb = 0;
+
+    switch (image->color_space) {
+    case CLRSPC_SRGB:
+        possible_fmts = rgb_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(rgb_pix_fmts);
+        break;
+    case CLRSPC_GRAY:
+        possible_fmts = gray_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(gray_pix_fmts);
+        break;
+    case CLRSPC_SYCC:
+        possible_fmts = yuv_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(yuv_pix_fmts);
+        break;
+    default:
+        possible_fmts = any_pix_fmts;
+        possible_fmts_nb = FF_ARRAY_ELEMS(any_pix_fmts);
+        break;
+    }
+
+    for (index = 0; index < possible_fmts_nb; ++index) {
+        if (libopenjpeg_matches_pix_fmt(image, possible_fmts[index])) {
+            return possible_fmts[index];
+        }
+    }
+
+    return PIX_FMT_NONE;
+}
+
+static inline int libopenjpeg_ispacked(enum PixelFormat pix_fmt)
+{
+    int i, component_plane;
+
+    if (pix_fmt == PIX_FMT_GRAY16)
+        return 0;
+
+    component_plane = av_pix_fmt_descriptors[pix_fmt].comp[0].plane;
+    for (i = 1; i < av_pix_fmt_descriptors[pix_fmt].nb_components; i++) {
+        if (component_plane != av_pix_fmt_descriptors[pix_fmt].comp[i].plane)
+            return 0;
+    }
+    return 1;
+}
+
+static void libopenjpeg_copy_to_packed8(AVFrame *picture, opj_image_t *image)
+{
+    uint8_t *img_ptr;
+    int index, x, y, c;
+
+    for (y = 0; y < picture->height; y++) {
+        index = y*picture->width;
+        img_ptr = picture->data[0] + y*picture->linesize[0];
+        for (x = 0; x < picture->width; x++, index++) {
+            for (c = 0; c < image->numcomps; c++) {
+                *img_ptr++ = image->comps[c].data[index];
+            }
+        }
+    }
+}
+
+static void libopenjpeg_copy_to_packed16(AVFrame *picture, opj_image_t *image)
+{
+    uint16_t *img_ptr;
+    int index, x, y, c;
+    int adjust[4];
+
+    for (x = 0; x < image->numcomps; x++)
+        adjust[x] = FFMAX(FFMIN(16 - image->comps[x].prec, 8), 0);
+
+    for (y = 0; y < picture->height; y++) {
+        index = y*picture->width;
+        img_ptr = (uint16_t*) (picture->data[0] + y*picture->linesize[0]);
+        for (x = 0; x < picture->width; x++, index++) {
+            for (c = 0; c < image->numcomps; c++) {
+                *img_ptr++ = image->comps[c].data[index] << adjust[c];
+            }
+        }
+    }
+}
+
+static void libopenjpeg_copyto8(AVFrame *picture, opj_image_t *image)
+{
+    int *comp_data;
+    uint8_t *img_ptr;
+    int index, x, y;
+
+    for (index = 0; index < image->numcomps; index++) {
+        comp_data = image->comps[index].data;
+        for (y = 0; y < image->comps[index].h; y++) {
+            img_ptr = picture->data[index] + y * picture->linesize[index];
+            for (x = 0; x < image->comps[index].w; x++) {
+                *img_ptr = (uint8_t) *comp_data;
+                img_ptr++;
+                comp_data++;
+            }
+        }
+    }
+}
+
+static void libopenjpeg_copyto16(AVFrame *p, opj_image_t *image)
+{
+    int *comp_data;
+    uint16_t *img_ptr;
+    int index, x, y;
+
+    for (index = 0; index < image->numcomps; index++) {
+        comp_data = image->comps[index].data;
+        for (y = 0; y < image->comps[index].h; y++) {
+            img_ptr = (uint16_t*) (p->data[index] + y * p->linesize[index]);
+            for (x = 0; x < image->comps[index].w; x++) {
+                *img_ptr = *comp_data;
+                img_ptr++;
+                comp_data++;
+            }
+        }
+    }
 }
 
 static av_cold int libopenjpeg_decode_init(AVCodecContext *avctx)
@@ -59,6 +237,7 @@ static av_cold int libopenjpeg_decode_init(AVCodecContext *avctx)
     LibOpenJPEGContext *ctx = avctx->priv_data;
 
     opj_set_default_decoder_parameters(&ctx->dec_params);
+    avcodec_get_frame_defaults(&ctx->image);
     avctx->coded_frame = &ctx->image;
     return 0;
 }
@@ -82,10 +261,10 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     opj_dinfo_t *dec;
     opj_cio_t *stream;
     opj_image_t *image;
-    int width, height, has_alpha = 0, ret = -1;
-    int x, y, index;
-    uint8_t *img_ptr;
-    int adjust[4];
+    int width, height, ret = -1;
+    int pixel_size = 0;
+    int ispacked = 0;
+    int i;
 
     *data_size = 0;
 
@@ -148,39 +327,31 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
 
     avcodec_set_dimensions(avctx, width, height);
 
-    switch (image->numcomps) {
-    case 1:
-        avctx->pix_fmt = PIX_FMT_GRAY8;
-        break;
-    case 3:
-        if (check_image_attributes(image)) {
-            avctx->pix_fmt = PIX_FMT_RGB24;
-        } else {
-            avctx->pix_fmt = PIX_FMT_GRAY8;
-            av_log(avctx, AV_LOG_ERROR,
-                   "Only first component will be used.\n");
-        }
-        break;
-    case 4:
-        has_alpha      = 1;
-        avctx->pix_fmt = PIX_FMT_RGBA;
-        break;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "%d components unsupported.\n",
-               image->numcomps);
+    if (avctx->pix_fmt != PIX_FMT_NONE)
+        if (!libopenjpeg_matches_pix_fmt(image, avctx->pix_fmt))
+            avctx->pix_fmt = PIX_FMT_NONE;
+
+    if (avctx->pix_fmt == PIX_FMT_NONE)
+        avctx->pix_fmt = libopenjpeg_guess_pix_fmt(image);
+
+    if (avctx->pix_fmt == PIX_FMT_NONE) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to determine pixel format\n");
+        ret = AVERROR_INVALIDDATA;
         goto done;
     }
 
+    for (i = 0; i < image->numcomps; i++)
+        if (image->comps[i].prec > avctx->bits_per_raw_sample)
+            avctx->bits_per_raw_sample = image->comps[i].prec;
+
     if (picture->data[0])
         ff_thread_release_buffer(avctx, picture);
 
     if (ff_thread_get_buffer(avctx, picture) < 0) {
         av_log(avctx, AV_LOG_ERROR, "ff_thread_get_buffer() failed\n");
-        return -1;
+        goto done;
     }
 
-    ff_thread_finish_setup(avctx);
-
     ctx->dec_params.cp_limit_decoding = NO_LIMITATION;
     // Tie decoder with decoding parameters.
     opj_setup_decoder(dec, &ctx->dec_params);
@@ -188,29 +359,53 @@ static int libopenjpeg_decode_frame(AVCodecContext *avctx,
     if (!stream) {
         av_log(avctx, AV_LOG_ERROR,
                "Codestream could not be opened for reading.\n");
-        opj_destroy_decompress(dec);
-        return -1;
+        goto done;
     }
 
-    // Decode the codestream.
+    opj_image_destroy(image);
+    // Decode the codestream
     image = opj_decode_with_info(dec, stream, NULL);
     opj_cio_close(stream);
 
-    for (x = 0; x < image->numcomps; x++)
-        adjust[x] = FFMAX(image->comps[x].prec - 8, 0);
+    if (!image) {
+        av_log(avctx, AV_LOG_ERROR, "Error decoding codestream.\n");
+        goto done;
+    }
 
-    for (y = 0; y < avctx->height; y++) {
-        index   = y * avctx->width;
-        img_ptr = picture->data[0] + y * picture->linesize[0];
-        for (x = 0; x < avctx->width; x++, index++) {
-            *img_ptr++ = image->comps[0].data[index] >> adjust[0];
-            if (image->numcomps > 2 && check_image_attributes(image)) {
-                *img_ptr++ = image->comps[1].data[index] >> adjust[1];
-                *img_ptr++ = image->comps[2].data[index] >> adjust[2];
-                if (has_alpha)
-                    *img_ptr++ = image->comps[3].data[index] >> adjust[3];
-            }
+    pixel_size =
+        av_pix_fmt_descriptors[avctx->pix_fmt].comp[0].step_minus1 + 1;
+    ispacked = libopenjpeg_ispacked(avctx->pix_fmt);
+
+    switch (pixel_size) {
+    case 1:
+        if (ispacked) {
+            libopenjpeg_copy_to_packed8(picture, image);
+        } else {
+            libopenjpeg_copyto8(picture, image);
         }
+        break;
+    case 2:
+        if (ispacked) {
+            libopenjpeg_copy_to_packed8(picture, image);
+        } else {
+            libopenjpeg_copyto16(picture, image);
+        }
+        break;
+    case 3:
+    case 4:
+        if (ispacked) {
+            libopenjpeg_copy_to_packed8(picture, image);
+        }
+        break;
+    case 6:
+    case 8:
+        if (ispacked) {
+            libopenjpeg_copy_to_packed16(picture, image);
+        }
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "unsupported pixel size %d\n", pixel_size);
+        goto done;
     }
 
     *output    = ctx->image;