Merge remote-tracking branch 'qatar/master'

* qatar/master: configure: Check for CommandLineToArgvW vc1dec: Do not use random pred_flag if motion vector data is skipped vp8: Enclose pthread function calls in ifdefs snow: refactor code to work around a compiler bug in MSVC. vp8: Include the thread headers before using the pthread types configure: Check for getaddrinfo in ws2tcpip.h, too vp8: implement sliced threading vp8: move data from VP8Context->VP8Macroblock vp8: refactor decoding a single mb_row doc: update api changes with the right commit hashes mem: introduce av_malloc_array and av_mallocz_array Conflicts: configure doc/APIchanges libavcodec/vp8.c libavutil/mem.h libavutil/version.h Merged-by: Michael Niedermayer <michaelni@gmx.at>
2012-07-16 01:32:52 +02:00 · 2012-07-16 01:32:52 +02:00 · fbe02459dc
parent 9a1963fbb8 b4675d0fbf
commit fbe02459dc
8 changed files with 539 additions and 253 deletions
--- a/5
+++ b/5
@ -1190,6 +1190,7 @@ HAVE_LIST="
    clock_gettime
    closesocket
    cmov
+    CommandLineToArgvW
    cpuid
    dcbzl
    dev_bktr_ioctl_bt848_h
@ -3182,7 +3183,8 @@ check_func nanosleep || { check_func nanosleep -lrt && add_extralibs -lrt; }
 check_func  clock_gettime || { check_func clock_gettime -lrt && add_extralibs -lrt; }
 check_func  fcntl
 check_func  fork
-check_func  getaddrinfo $network_extralibs
+check_func  getaddrinfo $network_extralibs ||
+    check_func_headers ws2tcpip.h getaddrinfo $network_extralibs
 check_func  gethrtime
 check_func  getopt
 check_func  getrusage
@ -3207,6 +3209,7 @@ check_func_headers conio.h kbhit
 check_func_headers windows.h PeekNamedPipe
 check_func_headers io.h setmode
 check_func_headers lzo/lzo1x.h lzo1x_999_compress
+check_lib2 "windows.h shellapi.h" CommandLineToArgvW -lshell32
 check_lib2 "windows.h psapi.h" GetProcessMemoryInfo -lpsapi
 check_func_headers windows.h GetProcessAffinityMask
 check_func_headers windows.h GetProcessTimes
--- a/doc/APIchanges
+++ b/doc/APIchanges
@ -57,29 +57,32 @@ API changes, most recent first:
 2012-03-26 - a67d9cf - lavfi 2.66.100
  Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions.

-2012-06-22 - xxxxxxx - lavu 51.34.0
+2012-07-10 - 5fade8a - lavu 51.37.0
+  Add av_malloc_array() and av_mallocz_array()
+
+2012-06-22 - d3d3a32 - lavu 51.34.0
  Add av_usleep()

 2012-06-20 - ae0a301 - lavu 51.33.0
  Move av_gettime() to libavutil, add libavutil/time.h

-2012-xx-xx - xxxxxxx - lavr 0.0.3
+2012-06-09 - 3971be0 - lavr 0.0.3
  Add a parameter to avresample_build_matrix() for Dolby/DPLII downmixing.

-2012-xx-xx - xxxxxxx - lavfi 2.23.0 - avfilter.h
+2012-06-12 - 9baeff9 - lavfi 2.23.0 - avfilter.h
  Add AVFilterContext.nb_inputs/outputs. Deprecate
  AVFilterContext.input/output_count.

-2012-xx-xx - xxxxxxx - lavfi 2.22.0 - avfilter.h
+2012-06-12 - 84b9fbe - lavfi 2.22.0 - avfilter.h
  Add avfilter_pad_get_type() and avfilter_pad_get_name(). Those
  should now be used instead of accessing AVFilterPad members
  directly.

-2012-xx-xx - xxxxxxx - lavu 51.32.0 - audioconvert.h
+2012-06-12 - b0f0dfc - lavu 51.32.0 - audioconvert.h
  Add av_get_channel_layout_channel_index(), av_get_channel_name()
  and av_channel_layout_extract_channel().

-2012-05-25 - e0e0793 - lavu 51.31.0 - opt.h
+2012-05-25 - 154486f - lavu 51.31.0 - opt.h
  Add av_opt_set_bin()

 2012-05-15 - lavfi 2.17.0
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@ -1013,10 +1013,18 @@ static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y
 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
    const int b_stride= s->b_width << s->block_max_depth;
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
-    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
+    BlockNode backup[4];
    unsigned value;
    int rd, index;

+    /* We don't initialize backup[] during variable declaration, because
+     * that fails to compile on MSVC: "cannot convert from 'BlockNode' to
+     * 'int16_t'". */
+    backup[0] = block[0];
+    backup[1] = block[1];
+    backup[2] = block[b_stride];
+    backup[3] = block[b_stride + 1];
+
    assert(mb_x>=0 && mb_y>=0);
    assert(mb_x<b_stride);
    assert(((mb_x|mb_y)&1) == 0);
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@ -3956,7 +3956,7 @@ static int vc1_decode_p_mb_intfi(VC1Context *v)
        s->current_picture.f.mb_type[mb_pos + v->mb_off] = MB_TYPE_16x16;
        for (i = 0; i < 6; i++) v->mb_type[0][s->block_index[i]] = 0;
        if (idx_mbmode <= 5) { // 1-MV
-            dmv_x = dmv_y = 0;
+            dmv_x = dmv_y = pred_flag = 0;
            if (idx_mbmode & 1) {
                get_mvdata_interlaced(v, &dmv_x, &dmv_y, &pred_flag);
            }
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@ -4,6 +4,7 @@
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
 * Copyright (C) 2010 Jason Garrett-Glaser
+ * Copyright (C) 2012 Daniel Kang
 *
 * This file is part of FFmpeg.
 *
@ -29,6 +30,11 @@
 #include "vp56data.h"
 #include "vp8dsp.h"
 #include "h264pred.h"
+#if HAVE_PTHREADS
+#include <pthread.h>
+#elif HAVE_W32THREADS
+#include "w32pthreads.h"
+#endif

 #define VP8_MAX_QUANT 127

@ -79,15 +85,51 @@ typedef struct {
    uint8_t mode;
    uint8_t ref_frame;
    uint8_t partitioning;
+    uint8_t chroma_pred_mode;
+    uint8_t segment;
+    uint8_t intra4x4_pred_mode_mb[16];
+    uint8_t intra4x4_pred_mode_top[4];
    VP56mv mv;
    VP56mv bmv[16];
 } VP8Macroblock;

 typedef struct {
+#if HAVE_THREADS
+    pthread_mutex_t lock;
+    pthread_cond_t  cond;
+#endif
+    int thread_nr;
+    int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
+    int wait_mb_pos; // What the current thread is waiting on.
+    uint8_t *edge_emu_buffer;
+    /**
+     * For coeff decode, we need to know whether the above block had non-zero
+     * coefficients. This means for each macroblock, we need data for 4 luma
+     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
+     * per macroblock. We keep the last row in top_nnz.
+     */
+    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+    /**
+     * This is the index plus one of the last non-zero coeff
+     * for each of the blocks in the current macroblock.
+     * So, 0 -> no coeffs
+     *     1 -> dc-only (special transform)
+     *     2+-> full transform
+     */
+    DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
+    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
+    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
+    VP8FilterStrength *filter_strength;
+} VP8ThreadData;
+
+#define MAX_THREADS 8
+typedef struct {
+    VP8ThreadData *thread_data;
    AVCodecContext *avctx;
    AVFrame *framep[4];
    AVFrame *next_framep[4];
-    uint8_t *edge_emu_buffer;
+    AVFrame *curframe;
+    AVFrame *prev_frame;

    uint16_t mb_width;   /* number of horizontal MB */
    uint16_t mb_height;  /* number of vertical MB */
@ -97,8 +139,6 @@ typedef struct {
    uint8_t keyframe;
    uint8_t deblock_filter;
    uint8_t mbskip_enabled;
-    uint8_t segment;             ///< segment of the current macroblock
-    uint8_t chroma_pred_mode;    ///< 8x8c pred mode of the current macroblock
    uint8_t profile;
    VP56mv mv_min;
    VP56mv mv_max;
@ -126,7 +166,6 @@ typedef struct {
    } filter;

    VP8Macroblock *macroblocks;
-    VP8FilterStrength *filter_strength;

    uint8_t *intra4x4_pred_mode_top;
    uint8_t intra4x4_pred_mode_left[4];
@ -167,33 +206,10 @@ typedef struct {
        int8_t ref[4];
    } lf_delta;

-    /**
-     * Cache of the top row needed for intra prediction
-     * 16 for luma, 8 for each chroma plane
-     */
    uint8_t (*top_border)[16+8+8];
-
-    /**
-     * For coeff decode, we need to know whether the above block had non-zero
-     * coefficients. This means for each macroblock, we need data for 4 luma
-     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
-     * per macroblock. We keep the last row in top_nnz.
-     */
    uint8_t (*top_nnz)[9];
-    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];

-    /**
-     * This is the index plus one of the last non-zero coeff
-     * for each of the blocks in the current macroblock.
-     * So, 0 -> no coeffs
-     *     1 -> dc-only (special transform)
-     *     2+-> full transform
-     */
-    DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
    VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors
-    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
-    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
-    uint8_t intra4x4_pred_mode_mb[16];

    /**
     * These are all of the updatable probabilities for binary decisions.
@ -246,6 +262,13 @@ typedef struct {
    uint8_t *segmentation_maps[5];
    int num_maps_to_be_freed;
    int maps_are_invalid;
+    int num_jobs;
+    /**
+     * This describes the macroblock memory layout.
+     * 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread).
+     * 1 -> Macroblocks for entire frame alloced (sliced thread).
+     */
+    int mb_layout;
 } VP8Context;

 #endif /* AVCODEC_VP8_H */
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@ -64,9 +64,9 @@
 #endif

 #if AV_GCC_VERSION_AT_LEAST(4,3)
-    #define av_alloc_size(n) __attribute__((alloc_size(n)))
+    #define av_alloc_size(...) __attribute__((alloc_size(__VA_ARGS__)))
 #else
-    #define av_alloc_size(n)
+    #define av_alloc_size(...)
 #endif

 /**
@ -79,6 +79,22 @@
 */
 void *av_malloc(size_t size) av_malloc_attrib av_alloc_size(1);

+/**
+ * Helper function to allocate a block of size * nmemb bytes with
+ * using av_malloc()
+ * @param nmemb Number of elements
+ * @param size Size of the single element
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_malloc()
+ */
+av_alloc_size(1,2) static inline void *av_malloc_array(size_t nmemb, size_t size)
+{
+    if (size <= 0 || nmemb >= INT_MAX / size)
+        return NULL;
+    return av_malloc(nmemb * size);
+}
+
 /**
 * Allocate or reallocate a block of memory.
 * If ptr is NULL and size > 0, allocate a new block. If
@ -135,6 +151,23 @@ void *av_mallocz(size_t size) av_malloc_attrib av_alloc_size(1);
 */
 void *av_calloc(size_t nmemb, size_t size) av_malloc_attrib;

+/**
+ * Helper function to allocate a block of size * nmemb bytes with
+ * using av_mallocz()
+ * @param nmemb Number of elements
+ * @param size Size of the single element
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_mallocz()
+ * @see av_malloc_array()
+ */
+av_alloc_size(1,2) static inline void *av_mallocz_array(size_t nmemb, size_t size)
+{
+    if (size <= 0 || nmemb >= INT_MAX / size)
+        return NULL;
+    return av_mallocz(nmemb * size);
+}
+
 /**
 * Duplicate the string s.
 * @param s string to be duplicated
--- a/libavutil/version.h
+++ b/libavutil/version.h
@ -39,7 +39,7 @@
 */

 #define LIBAVUTIL_VERSION_MAJOR 51
-#define LIBAVUTIL_VERSION_MINOR 64
+#define LIBAVUTIL_VERSION_MINOR 65
 #define LIBAVUTIL_VERSION_MICRO 100

 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \