From cd8c64e197ca7295c609b120039f7032f3a9356c Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Mon, 24 Oct 2011 00:59:41 +0200
Subject: [PATCH 1/3] Revert "mpeg12: move full_pel from MpegEncContext to
 Mpeg1Context"

This reverts commit da22ba7df461c13bf0b0eabc953303803a285d91 since it
broke slice threading. Slice threading just duplicates MpegEncContext
so every value used during mpeg_decode_slice has to be in it.
A second patch will fix the illusion that Mpeg1Context is available
in mpeg_decode_slice.
---
 libavcodec/mpeg12.c         | 15 +++++++--------
 libavcodec/mpeg12.h         |  1 -
 libavcodec/mpegvideo.h      |  1 +
 libavcodec/vdpau.c          |  7 +++----
 libavcodec/vdpau_internal.h |  3 +--
 5 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 6fb175e626..4542d5944a 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -734,9 +734,8 @@ static void exchange_uv(MpegEncContext *s)
 #define MT_16X8  2
 #define MT_DMV   3
 
-static int mpeg_decode_mb(Mpeg1Context *s1, DCTELEM block[12][64])
+static int mpeg_decode_mb(MpegEncContext *s, DCTELEM block[12][64])
 {
-    MpegEncContext *s = &s1->mpeg_enc_ctx;
     int i, j, k, cbp, val, mb_type, motion_type;
     const int mb_block_count = 4 + (1 << s->chroma_format);
 
@@ -910,7 +909,7 @@ static int mpeg_decode_mb(Mpeg1Context *s1, DCTELEM block[12][64])
                             s->mv[i][0][1]= s->last_mv[i][0][1]= s->last_mv[i][1][1] =
                                 mpeg_decode_motion(s, s->mpeg_f_code[i][1], s->last_mv[i][0][1]);
                             /* full_pel: only for MPEG-1 */
-                            if (s1->full_pel[i]) {
+                            if (s->full_pel[i]) {
                                 s->mv[i][0][0] <<= 1;
                                 s->mv[i][0][1] <<= 1;
                             }
@@ -1328,7 +1327,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
 
     vbv_delay = get_bits(&s->gb, 16);
     if (s->pict_type == AV_PICTURE_TYPE_P || s->pict_type == AV_PICTURE_TYPE_B) {
-        s1->full_pel[0] = get_bits1(&s->gb);
+        s->full_pel[0] = get_bits1(&s->gb);
         f_code = get_bits(&s->gb, 3);
         if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
             return -1;
@@ -1336,7 +1335,7 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
         s->mpeg_f_code[0][1] = f_code;
     }
     if (s->pict_type == AV_PICTURE_TYPE_B) {
-        s1->full_pel[1] = get_bits1(&s->gb);
+        s->full_pel[1] = get_bits1(&s->gb);
         f_code = get_bits(&s->gb, 3);
         if (f_code == 0 && (avctx->err_recognition & AV_EF_BITSTREAM))
             return -1;
@@ -1484,7 +1483,7 @@ static void mpeg_decode_picture_coding_extension(Mpeg1Context *s1)
 {
     MpegEncContext *s = &s1->mpeg_enc_ctx;
 
-    s1->full_pel[0] = s1->full_pel[1] = 0;
+    s->full_pel[0] = s->full_pel[1] = 0;
     s->mpeg_f_code[0][0] = get_bits(&s->gb, 4);
     s->mpeg_f_code[0][1] = get_bits(&s->gb, 4);
     s->mpeg_f_code[1][0] = get_bits(&s->gb, 4);
@@ -1716,7 +1715,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
         if (CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration > 1)
             ff_xvmc_init_block(s); // set s->block
 
-        if (mpeg_decode_mb(s1, s->block) < 0)
+        if (mpeg_decode_mb(s, s->block) < 0)
             return -1;
 
         if (s->current_picture.f.motion_val[0] && !s->encoding) { // note motion_val is normally NULL unless we want to extract the MVs
@@ -2259,7 +2258,7 @@ static int decode_chunks(AVCodecContext *avctx,
                 }
 
                 if (CONFIG_MPEG_VDPAU_DECODER && avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU)
-                    ff_vdpau_mpeg_picture_complete(s, buf, buf_size, s->slice_count);
+                    ff_vdpau_mpeg_picture_complete(s2, buf, buf_size, s->slice_count);
 
                 if (slice_end(avctx, picture)) {
                     if (s2->last_picture_ptr || s2->low_delay) //FIXME merge with the stuff in mpeg_decode_slice
diff --git a/libavcodec/mpeg12.h b/libavcodec/mpeg12.h
index 209bf05234..ab0352ff10 100644
--- a/libavcodec/mpeg12.h
+++ b/libavcodec/mpeg12.h
@@ -41,7 +41,6 @@ typedef struct Mpeg1Context {
     int save_width, save_height, save_progressive_seq;
     AVRational frame_rate_ext;       ///< MPEG-2 specific framerate modificator
     int sync;                        ///< Did we reach a sync point like a GOP/SEQ/KEYFrame?
-    int full_pel[2];
     int closed_gop;                  ///< GOP is closed
 } Mpeg1Context;
 
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index b69519a272..46ad5d8306 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -633,6 +633,7 @@ typedef struct MpegEncContext {
     int chroma_y_shift;
 
     int progressive_frame;
+    int full_pel[2];
     int interlaced_dct;
     int first_slice;
     int first_field;         ///< is 1 for the first field of a field picture 0 otherwise
diff --git a/libavcodec/vdpau.c b/libavcodec/vdpau.c
index 2a7894edd2..df04ca01fc 100644
--- a/libavcodec/vdpau.c
+++ b/libavcodec/vdpau.c
@@ -190,10 +190,9 @@ void ff_vdpau_h264_picture_complete(MpegEncContext *s)
     render->bitstream_buffers_used = 0;
 }
 
-void ff_vdpau_mpeg_picture_complete(Mpeg1Context *s1, const uint8_t *buf,
+void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
                                     int buf_size, int slice_count)
 {
-    MpegEncContext *s = &s1->mpeg_enc_ctx;
     struct vdpau_render_state *render, *last, *next;
     int i;
 
@@ -212,8 +211,8 @@ void ff_vdpau_mpeg_picture_complete(Mpeg1Context *s1, const uint8_t *buf,
     render->info.mpeg.alternate_scan             = s->alternate_scan;
     render->info.mpeg.q_scale_type               = s->q_scale_type;
     render->info.mpeg.top_field_first            = s->top_field_first;
-    render->info.mpeg.full_pel_forward_vector    = s1->full_pel[0]; // MPEG-1 only.  Set 0 for MPEG-2
-    render->info.mpeg.full_pel_backward_vector   = s1->full_pel[1]; // MPEG-1 only.  Set 0 for MPEG-2
+    render->info.mpeg.full_pel_forward_vector    = s->full_pel[0]; // MPEG-1 only.  Set 0 for MPEG-2
+    render->info.mpeg.full_pel_backward_vector   = s->full_pel[1]; // MPEG-1 only.  Set 0 for MPEG-2
     render->info.mpeg.f_code[0][0]               = s->mpeg_f_code[0][0]; // For MPEG-1 fill both horiz. & vert.
     render->info.mpeg.f_code[0][1]               = s->mpeg_f_code[0][1];
     render->info.mpeg.f_code[1][0]               = s->mpeg_f_code[1][0];
diff --git a/libavcodec/vdpau_internal.h b/libavcodec/vdpau_internal.h
index 49f8e46a00..673fd3349b 100644
--- a/libavcodec/vdpau_internal.h
+++ b/libavcodec/vdpau_internal.h
@@ -26,12 +26,11 @@
 
 #include <stdint.h>
 #include "mpegvideo.h"
-#include "mpeg12.h"
 
 void ff_vdpau_add_data_chunk(MpegEncContext *s, const uint8_t *buf,
                              int buf_size);
 
-void ff_vdpau_mpeg_picture_complete(Mpeg1Context *s1, const uint8_t *buf,
+void ff_vdpau_mpeg_picture_complete(MpegEncContext *s, const uint8_t *buf,
                                     int buf_size, int slice_count);
 
 void ff_vdpau_h264_picture_start(MpegEncContext *s);

From d99fe3a478a4b7cfd1fc3a05aa374b9cf44d13d8 Mon Sep 17 00:00:00 2001
From: Janne Grunau <janne-libav@jannau.net>
Date: Mon, 24 Oct 2011 01:05:00 +0200
Subject: [PATCH 2/3] mpeg12: fix mpeg_decode_slice context parameter type

During slice threading only MpegEncContext is passed to
mpeg_decode_slice, remove a wrong cast and change the function
definition to take MpegEncContext pointer.
---
 libavcodec/mpeg12.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 4542d5944a..226bde36ac 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -1623,10 +1623,9 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
  * @return DECODE_SLICE_ERROR if the slice is damaged<br>
  *         DECODE_SLICE_OK if this slice is ok<br>
  */
-static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
+static int mpeg_decode_slice(MpegEncContext *s, int mb_y,
                              const uint8_t **buf, int buf_size)
 {
-    MpegEncContext *s     = &s1->mpeg_enc_ctx;
     AVCodecContext *avctx = s->avctx;
     const int lowres      = s->avctx->lowres;
     const int field_pic   = s->picture_structure != PICT_FRAME;
@@ -1858,7 +1857,7 @@ static int slice_decode_thread(AVCodecContext *c, void *arg)
         uint32_t start_code;
         int ret;
 
-        ret = mpeg_decode_slice((Mpeg1Context*)s, mb_y, &buf, s->gb.buffer_end - buf);
+        ret = mpeg_decode_slice(s, mb_y, &buf, s->gb.buffer_end - buf);
         emms_c();
 //av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n",
 //ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count);
@@ -2443,7 +2442,7 @@ static int decode_chunks(AVCodecContext *avctx,
                     }
                     buf_ptr += 2; // FIXME add minimum number of bytes per slice
                 } else {
-                    ret = mpeg_decode_slice(s, mb_y, &buf_ptr, input_size);
+                    ret = mpeg_decode_slice(s2, mb_y, &buf_ptr, input_size);
                     emms_c();
 
                     if (ret < 0) {

From c8477df019f02a436c69e8ca54370dd5cf8eee97 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Mon, 24 Oct 2011 00:08:23 +0100
Subject: [PATCH 3/3] adpcm: use sign_extend()

This avoids warnings from the overflow checker and simplifies the code.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/adpcm.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/libavcodec/adpcm.c b/libavcodec/adpcm.c
index 55f518b15b..4a818575cf 100644
--- a/libavcodec/adpcm.c
+++ b/libavcodec/adpcm.c
@@ -824,13 +824,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
             coeff2r = ea_adpcm_table[(*src & 0x0F) + 4];
             src++;
 
-            shift_left  = (*src >> 4  ) + 8;
-            shift_right = (*src & 0x0F) + 8;
+            shift_left  = 20 - (*src >> 4);
+            shift_right = 20 - (*src & 0x0F);
             src++;
 
             for (count2 = 0; count2 < 28; count2++) {
-                next_left_sample  = (int32_t)((*src & 0xF0) << 24) >> shift_left;
-                next_right_sample = (int32_t)((*src & 0x0F) << 28) >> shift_right;
+                next_left_sample  = sign_extend(*src >> 4, 4) << shift_left;
+                next_right_sample = sign_extend(*src,      4) << shift_right;
                 src++;
 
                 next_left_sample = (next_left_sample +
@@ -861,13 +861,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         for(channel = 0; channel < avctx->channels; channel++) {
             for (i=0; i<2; i++)
                 coeff[channel][i] = ea_adpcm_table[(*src >> 4) + 4*i];
-            shift[channel] = (*src & 0x0F) + 8;
+            shift[channel] = 20 - (*src & 0x0F);
             src++;
         }
         for (count1 = 0; count1 < nb_samples / 2; count1++) {
             for(i = 4; i >= 0; i-=4) { /* Pairwise samples LL RR (st) or LL LL (mono) */
                 for(channel = 0; channel < avctx->channels; channel++) {
-                    int32_t sample = (int32_t)(((*(src+channel) >> i) & 0x0F) << 0x1C) >> shift[channel];
+                    int32_t sample = sign_extend(src[channel] >> i, 4) << shift[channel];
                     sample = (sample +
                              c->status[channel].sample1 * coeff[channel][0] +
                              c->status[channel].sample2 * coeff[channel][1] + 0x80) >> 8;
@@ -932,14 +932,14 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
                 } else {
                     coeff1 = ea_adpcm_table[ *srcC>>4     ];
                     coeff2 = ea_adpcm_table[(*srcC>>4) + 4];
-                    shift = (*srcC++ & 0x0F) + 8;
+                    shift = 20 - (*srcC++ & 0x0F);
 
                     if (srcC > src_end - 14) break;
                     for (count2=0; count2<28; count2++) {
                         if (count2 & 1)
-                            next_sample = (int32_t)((*srcC++ & 0x0F) << 28) >> shift;
+                            next_sample = sign_extend(*srcC++,    4) << shift;
                         else
-                            next_sample = (int32_t)((*srcC   & 0xF0) << 24) >> shift;
+                            next_sample = sign_extend(*srcC >> 4, 4) << shift;
 
                         next_sample += (current_sample  * coeff1) +
                                        (previous_sample * coeff2);
@@ -976,7 +976,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
             for (n=0; n<4; n++, s+=32*avctx->channels) {
                 for (i=0; i<2; i++)
                     coeff[i][n] = ea_adpcm_table[(src[0]&0x0F)+4*i];
-                shift[n] = (src[2]&0x0F) + 8;
+                shift[n] = 20 - (src[2] & 0x0F);
                 for (s2=s, i=0; i<2; i++, src+=2, s2+=avctx->channels)
                     s2[0] = (src[0]&0xF0) + (src[1]<<8);
             }
@@ -985,7 +985,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
                 s = &samples[m*avctx->channels + channel];
                 for (n=0; n<4; n++, src++, s+=32*avctx->channels) {
                     for (s2=s, i=0; i<8; i+=4, s2+=avctx->channels) {
-                        int level = (int32_t)((*src & (0xF0>>i)) << (24+i)) >> shift[n];
+                        int level = sign_extend(*src >> (4 - i), 4) << shift[n];
                         int pred  = s2[-1*avctx->channels] * coeff[0][n]
                                   + s2[-2*avctx->channels] * coeff[1][n];
                         s2[0] = av_clip_int16((level + pred + 0x80) >> 8);
@@ -1149,18 +1149,18 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
             /* Read in every sample for this channel.  */
             for (i = 0; i < nb_samples / 14; i++) {
                 int index = (*src >> 4) & 7;
-                unsigned int exp = 28 - (*src++ & 15);
+                unsigned int exp = *src++ & 15;
                 int factor1 = table[ch][index * 2];
                 int factor2 = table[ch][index * 2 + 1];
 
                 /* Decode 14 samples.  */
                 for (n = 0; n < 14; n++) {
                     int32_t sampledat;
-                    if(n&1) sampledat=  *src++    <<28;
-                    else    sampledat= (*src&0xF0)<<24;
+                    if(n&1) sampledat = sign_extend(*src++, 4);
+                    else    sampledat = sign_extend(*src >> 4, 4);
 
                     sampledat = ((prev[ch][0]*factor1
-                                + prev[ch][1]*factor2) >> 11) + (sampledat>>exp);
+                                + prev[ch][1]*factor2) >> 11) + (sampledat << exp);
                     *samples = av_clip_int16(sampledat);
                     prev[ch][1] = prev[ch][0];
                     prev[ch][0] = *samples++;