From 27237d524e56210992b18486924894bb4f3fdbb8 Mon Sep 17 00:00:00 2001 From: Steven Walters Date: Sun, 9 Oct 2011 21:38:35 +0200 Subject: [PATCH] w32threads: support for frame multithreading Replace our incomplete w32threads implementation with x264's pthreads w32threads wrapper. Relicensed to LGPL with kind permission by Pegasys Inc. Signed-off-by: Janne Grunau --- libavcodec/Makefile | 2 +- libavcodec/h264.c | 2 +- libavcodec/h264_direct.c | 2 +- libavcodec/mpeg12.c | 2 +- libavcodec/mpegvideo.c | 2 +- libavcodec/pthread.c | 13 ++- libavcodec/utils.c | 6 +- libavcodec/vp3.c | 8 +- libavcodec/w32pthreads.h | 207 +++++++++++++++++++++++++++++++++++++++ libavcodec/w32thread.c | 176 --------------------------------- 10 files changed, 230 insertions(+), 190 deletions(-) create mode 100644 libavcodec/w32pthreads.h delete mode 100644 libavcodec/w32thread.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 04a504be74..4c3eca379b 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -645,7 +645,7 @@ OBJS-$(CONFIG_TEXT2MOVSUB_BSF) += movsub_bsf.o # thread libraries OBJS-$(HAVE_PTHREADS) += pthread.o -OBJS-$(HAVE_W32THREADS) += w32thread.o +OBJS-$(HAVE_W32THREADS) += pthread.o OBJS-$(CONFIG_MLIB) += mlib/dsputil_mlib.o \ diff --git a/libavcodec/h264.c b/libavcodec/h264.c index b1204c07b8..1faaaa6802 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -688,7 +688,7 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t assert(IS_INTER(mb_type)); - if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) + if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) await_references(h); prefetch_motion(h, 0, pixel_shift, chroma444); diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c index 4e05937ab4..a953728a12 100644 --- a/libavcodec/h264_direct.c +++ b/libavcodec/h264_direct.c @@ -147,7 +147,7 @@ static void await_reference_mb_row(H264Context * const h, Picture *ref, int mb_y int ref_field_picture = ref->field_picture; int ref_height = 16*h->s.mb_height >> ref_field_picture; - if(!HAVE_PTHREADS || !(h->s.avctx->active_thread_type&FF_THREAD_FRAME)) + if(!HAVE_THREADS || !(h->s.avctx->active_thread_type&FF_THREAD_FRAME)) return; //FIXME it can be safe to access mb stuff diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index 5734ab8674..4e95e02a9b 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -1596,7 +1596,7 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size) *s->current_picture_ptr->f.pan_scan = s1->pan_scan; - if (HAVE_PTHREADS && (avctx->active_thread_type & FF_THREAD_FRAME)) + if (HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_FRAME)) ff_thread_finish_setup(avctx); } else { // second field int i; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index b8ac2ceb3d..cbc7d99818 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -2119,7 +2119,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], /* decoding or more than one mb_type (MC was already done otherwise) */ if(!s->encoding){ - if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) { + if(HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) { if (s->mv_dir & MV_DIR_FORWARD) { ff_thread_await_progress((AVFrame*)s->last_picture_ptr, MPV_lowest_referenced_row(s, 0), 0); } diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c index 42f4382e7d..d5129dcc3e 100644 --- a/libavcodec/pthread.c +++ b/libavcodec/pthread.c @@ -29,11 +29,16 @@ * @see doc/multithreading.txt */ -#include - +#include "config.h" #include "avcodec.h" #include "thread.h" +#if HAVE_PTHREADS +#include +#elif HAVE_W32THREADS +#include "w32pthreads.h" +#endif + typedef int (action_func)(AVCodecContext *c, void *arg); typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr); @@ -898,6 +903,10 @@ int ff_thread_init(AVCodecContext *avctx) return -1; } +#if HAVE_W32THREADS + w32thread_init(); +#endif + if (avctx->codec) { validate_thread_parameters(avctx); diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 1c95fa1236..ac4de7da4a 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -723,7 +723,7 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi avctx->pkt = avpkt; if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type&FF_THREAD_FRAME)){ - if (HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) + if (HAVE_THREADS && avctx->active_thread_type&FF_THREAD_FRAME) ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr, avpkt); else { @@ -1089,7 +1089,7 @@ const char *avcodec_license(void) void avcodec_flush_buffers(AVCodecContext *avctx) { - if(HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) + if(HAVE_THREADS && avctx->active_thread_type&FF_THREAD_FRAME) ff_thread_flush(avctx); else if(avctx->codec->flush) avctx->codec->flush(avctx); @@ -1277,7 +1277,7 @@ unsigned int ff_toupper4(unsigned int x) + (toupper((x>>24)&0xFF)<<24); } -#if !HAVE_PTHREADS +#if !HAVE_THREADS int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f) { diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c index 462a482f42..9262c27f05 100644 --- a/libavcodec/vp3.c +++ b/libavcodec/vp3.c @@ -1316,7 +1316,7 @@ static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y) int h, cy; int offset[4]; - if (HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) { + if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) { int y_flipped = s->flipped_image ? s->avctx->height-y : y; // At the end of the frame, report INT_MAX instead of the height of the frame. @@ -1400,7 +1400,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) int fragment_width = s->fragment_width[!!plane]; int fragment_height = s->fragment_height[!!plane]; int fragment_start = s->fragment_start[plane]; - int do_await = !plane && HAVE_PTHREADS && (s->avctx->active_thread_type&FF_THREAD_FRAME); + int do_await = !plane && HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_FRAME); if (!s->flipped_image) stride = -stride; if (CONFIG_GRAY && plane && (s->avctx->flags & CODEC_FLAG_GRAY)) @@ -1965,7 +1965,7 @@ static int vp3_decode_frame(AVCodecContext *avctx, *data_size=sizeof(AVFrame); *(AVFrame*)data= s->current_frame; - if (!HAVE_PTHREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) + if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) update_frames(avctx); return buf_size; @@ -1973,7 +1973,7 @@ static int vp3_decode_frame(AVCodecContext *avctx, error: ff_thread_report_progress(&s->current_frame, INT_MAX, 0); - if (!HAVE_PTHREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) + if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) avctx->release_buffer(avctx, &s->current_frame); return -1; diff --git a/libavcodec/w32pthreads.h b/libavcodec/w32pthreads.h new file mode 100644 index 0000000000..7774817518 --- /dev/null +++ b/libavcodec/w32pthreads.h @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2010-2011 x264 project + * + * Authors: Steven Walters + * Pegasys Inc. + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * w32threads to pthreads wrapper + */ + +#ifndef AVCODEC_W32PTHREADS_H +#define AVCODEC_W32PTHREADS_H + +/* Build up a pthread-like API using underlying Windows API. Have only static + * methods so as to not conflict with a potentially linked in pthread-win32 + * library. + * As most functions here are used without checking return values, + * only implement return values as necessary. */ + +#define WIN32_LEAN_AND_MEAN +#include +#include + +typedef struct { + void *handle; + void *(*func)(void* arg); + void *arg; + void *ret; +} pthread_t; + +/* the conditional variable api for windows 6.0+ uses critical sections and + * not mutexes */ +typedef CRITICAL_SECTION pthread_mutex_t; + +/* This is the CONDITIONAL_VARIABLE typedef for using Window's native + * conditional variables on kernels 6.0+. + * MinGW does not currently have this typedef. */ +typedef struct { + void *ptr; +} pthread_cond_t; + +/* function pointers to conditional variable API on windows 6.0+ kernels */ +static void (WINAPI *cond_broadcast)(pthread_cond_t *cond); +static void (WINAPI *cond_init)(pthread_cond_t *cond); +static void (WINAPI *cond_signal)(pthread_cond_t *cond); +static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex, + DWORD milliseconds); + +static unsigned __stdcall attribute_align_arg win32thread_worker(void *arg) +{ + pthread_t *h = arg; + h->ret = h->func(h->arg); + return 0; +} + +static int pthread_create(pthread_t *thread, const void *unused_attr, + void *(*start_routine)(void*), void *arg) +{ + thread->func = start_routine; + thread->arg = arg; + thread->handle = (void*)_beginthreadex(NULL, 0, win32thread_worker, thread, + 0, NULL); + return !thread->handle; +} + +static void pthread_join(pthread_t thread, void **value_ptr) +{ + DWORD ret = WaitForSingleObject(thread.handle, INFINITE); + if (ret != WAIT_OBJECT_0) + return; + if (value_ptr) + *value_ptr = thread.ret; + CloseHandle(thread.handle); +} + +#define pthread_mutex_init(m, a) InitializeCriticalSection(m) +#define pthread_mutex_destroy(m) DeleteCriticalSection(m) +#define pthread_mutex_lock(m) EnterCriticalSection(m) +#define pthread_mutex_unlock(m) LeaveCriticalSection(m) + +/* for pre-Windows 6.0 platforms we need to define and use our own condition + * variable and api */ +typedef struct { + pthread_mutex_t mtx_waiter_count; + volatile int waiter_count; + HANDLE semaphore; +} win32_cond_t; + +static void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) +{ + win32_cond_t *win32_cond = NULL; + if (cond_init) { + cond_init(cond); + return; + } + + /* non native condition variables */ + win32_cond = av_mallocz(sizeof(win32_cond_t)); + if (!win32_cond) + return; + cond->ptr = win32_cond; + win32_cond->semaphore = CreateSemaphore(NULL, 0, 0x7fffffff, NULL); + if (!win32_cond->semaphore) + return; + + pthread_mutex_init(&win32_cond->mtx_waiter_count, NULL); +} + +static void pthread_cond_destroy(pthread_cond_t *cond) +{ + win32_cond_t *win32_cond = cond->ptr; + /* native condition variables do not destroy */ + if (cond_init) + return; + + /* non native condition variables */ + CloseHandle(win32_cond->semaphore); + pthread_mutex_destroy(&win32_cond->mtx_waiter_count); + av_freep(&win32_cond); + cond->ptr = NULL; +} + +static void pthread_cond_broadcast(pthread_cond_t *cond) +{ + win32_cond_t *win32_cond = cond->ptr; + if (cond_broadcast) { + cond_broadcast(cond); + return; + } + + /* non native condition variables */ + pthread_mutex_lock(&win32_cond->mtx_waiter_count); + if (win32_cond->waiter_count) { + ReleaseSemaphore(win32_cond->semaphore, win32_cond->waiter_count, NULL); + win32_cond->waiter_count = 0; + } + pthread_mutex_unlock(&win32_cond->mtx_waiter_count); +} + +static void pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) +{ + win32_cond_t *win32_cond = cond->ptr; + if (cond_wait) { + cond_wait(cond, mutex, INFINITE); + return; + } + + /* non native condition variables */ + pthread_mutex_lock(&win32_cond->mtx_waiter_count); + win32_cond->waiter_count++; + pthread_mutex_unlock(&win32_cond->mtx_waiter_count); + + pthread_mutex_unlock(mutex); + WaitForSingleObject(win32_cond->semaphore, INFINITE); + pthread_mutex_lock(mutex); +} + +static void pthread_cond_signal(pthread_cond_t *cond) +{ + win32_cond_t *win32_cond = cond->ptr; + if (cond_signal) { + cond_signal(cond); + return; + } + + /* non-native condition variables */ + pthread_mutex_lock(&win32_cond->mtx_waiter_count); + if (win32_cond->waiter_count) { + ReleaseSemaphore(win32_cond->semaphore, 1, NULL); + win32_cond->waiter_count--; + } + pthread_mutex_unlock(&win32_cond->mtx_waiter_count); +} + +static void w32thread_init(void) +{ + HANDLE kernel_dll = GetModuleHandle(TEXT("kernel32.dll")); + /* if one is available, then they should all be available */ + cond_init = + (void*)GetProcAddress(kernel_dll, "InitializeConditionVariable"); + cond_broadcast = + (void*)GetProcAddress(kernel_dll, "WakeAllConditionVariable"); + cond_signal = + (void*)GetProcAddress(kernel_dll, "WakeConditionVariable"); + cond_wait = + (void*)GetProcAddress(kernel_dll, "SleepConditionVariableCS"); +} + +#endif /* AVCODEC_W32PTHREADS_H */ diff --git a/libavcodec/w32thread.c b/libavcodec/w32thread.c deleted file mode 100644 index ceaafebc80..0000000000 --- a/libavcodec/w32thread.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2004 Michael Niedermayer - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ -//#define DEBUG - -#include "avcodec.h" -#include "thread.h" - -#define WIN32_LEAN_AND_MEAN -#include -#include - -typedef struct ThreadContext{ - AVCodecContext *avctx; - HANDLE thread; - HANDLE work_sem; - HANDLE job_sem; - HANDLE done_sem; - int (*func)(AVCodecContext *c, void *arg); - int (*func2)(AVCodecContext *c, void *arg, int, int); - void *arg; - int argsize; - int *jobnr; - int *ret; - int threadnr; -}ThreadContext; - - -static unsigned WINAPI attribute_align_arg thread_func(void *v){ - ThreadContext *c= v; - - for(;;){ - int ret, jobnr; -//printf("thread_func %X enter wait\n", (int)v); fflush(stdout); - WaitForSingleObject(c->work_sem, INFINITE); - // avoid trying to access jobnr if we should quit - if (!c->func && !c->func2) - break; - WaitForSingleObject(c->job_sem, INFINITE); - jobnr = (*c->jobnr)++; - ReleaseSemaphore(c->job_sem, 1, 0); -//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout); - if(c->func) - ret= c->func(c->avctx, (uint8_t *)c->arg + jobnr*c->argsize); - else - ret= c->func2(c->avctx, c->arg, jobnr, c->threadnr); - if (c->ret) - c->ret[jobnr] = ret; -//printf("thread_func %X signal complete\n", (int)v); fflush(stdout); - ReleaseSemaphore(c->done_sem, 1, 0); - } - - return 0; -} - -/** - * Free what has been allocated by ff_thread_init(). - * Must be called after decoding has finished, especially do not call while avcodec_thread_execute() is running. - */ -void ff_thread_free(AVCodecContext *s){ - ThreadContext *c= s->thread_opaque; - int i; - - for(i=0; ithread_count; i++){ - - c[i].func= NULL; - c[i].func2= NULL; - } - ReleaseSemaphore(c[0].work_sem, s->thread_count, 0); - for(i=0; ithread_count; i++){ - WaitForSingleObject(c[i].thread, INFINITE); - if(c[i].thread) CloseHandle(c[i].thread); - } - if(c[0].work_sem) CloseHandle(c[0].work_sem); - if(c[0].job_sem) CloseHandle(c[0].job_sem); - if(c[0].done_sem) CloseHandle(c[0].done_sem); - - av_freep(&s->thread_opaque); -} - -static int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size){ - ThreadContext *c= s->thread_opaque; - int i; - int jobnr = 0; - - assert(s == c->avctx); - - /* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */ - - for(i=0; ithread_count; i++){ - c[i].arg= arg; - c[i].argsize= size; - c[i].func= func; - c[i].ret= ret; - c[i].jobnr = &jobnr; - } - ReleaseSemaphore(c[0].work_sem, count, 0); - for(i=0; ithread_opaque; - int i; - for(i=0; ithread_count; i++) - c[i].func2 = func; - avcodec_thread_execute(s, NULL, arg, ret, count, 0); -} - -int ff_thread_init(AVCodecContext *s){ - int i; - ThreadContext *c; - uint32_t threadid; - - if (s->thread_type && !(s->thread_type & FF_THREAD_SLICE)) { - av_log(s, AV_LOG_WARNING, - "This thread library only supports FF_THREAD_SLICE" - " threading algorithm.\n"); - return 0; - } - - s->active_thread_type= FF_THREAD_SLICE; - - if (s->thread_count <= 1) - return 0; - - assert(!s->thread_opaque); - c= av_mallocz(sizeof(ThreadContext)*s->thread_count); - s->thread_opaque= c; - if(!(c[0].work_sem = CreateSemaphore(NULL, 0, INT_MAX, NULL))) - goto fail; - if(!(c[0].job_sem = CreateSemaphore(NULL, 1, 1, NULL))) - goto fail; - if(!(c[0].done_sem = CreateSemaphore(NULL, 0, INT_MAX, NULL))) - goto fail; - - for(i=0; ithread_count; i++){ -//printf("init semaphors %d\n", i); fflush(stdout); - c[i].avctx= s; - c[i].work_sem = c[0].work_sem; - c[i].job_sem = c[0].job_sem; - c[i].done_sem = c[0].done_sem; - c[i].threadnr = i; - -//printf("create thread %d\n", i); fflush(stdout); - c[i].thread = (HANDLE)_beginthreadex(NULL, 0, thread_func, &c[i], 0, &threadid ); - if( !c[i].thread ) goto fail; - } -//printf("init done\n"); fflush(stdout); - - s->execute= avcodec_thread_execute; - s->execute2= avcodec_thread_execute2; - - return 0; -fail: - ff_thread_free(s); - return -1; -}