mirror of
https://github.com/mpv-player/mpv
synced 2024-12-16 20:05:07 +00:00
Patch by Karolina Lindqvist <karolina.lindqvist@kramnet.se>
"This patch is the MMX optimizations for the zrmjpeg filter, which is used by the zr2 video output driver." With some small changes by me: - column width=80 - kept jpeg_enc_* functions static because they confuse the current vo_zr.c - did not include jpeg_enc.h because jpeg_enc functions are still static git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@19956 b3059339-0415-0410-9bf9-f77b7e298cf2
This commit is contained in:
parent
1a3cfa9283
commit
4bb03ad126
@ -31,11 +31,13 @@
|
||||
#include "libavcodec/avcodec.h"
|
||||
#include "libavcodec/dsputil.h"
|
||||
#include "libavcodec/mpegvideo.h"
|
||||
//#include "jpeg_enc.h" /* this file is not present yet */
|
||||
|
||||
#undef malloc
|
||||
#undef free
|
||||
#undef realloc
|
||||
|
||||
extern int avcodec_inited;
|
||||
|
||||
/* some convenient #define's, is this portable enough? */
|
||||
#define VERBOSE(...) mp_msg(MSGT_DECVIDEO, MSGL_V, "vf_zrmjpeg: " __VA_ARGS__)
|
||||
@ -60,6 +62,9 @@ typedef struct MJpegContext {
|
||||
uint16_t huff_code_ac_chrominance[256];
|
||||
} MJpegContext;
|
||||
|
||||
// The get_pixels routine to use. The real routine comes from dsputil
|
||||
static void (*get_pixels)(DCTELEM *restrict block, const uint8_t *pixels, int line_size);
|
||||
|
||||
/* Begin excessive code duplication ************************************/
|
||||
/* Code coming from mpegvideo.c and mjpeg.c in ../libavcodec ***********/
|
||||
|
||||
@ -75,6 +80,10 @@ static const unsigned short aanscales[64] = {
|
||||
4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
|
||||
};
|
||||
|
||||
/*
|
||||
* This routine is like the routine with the same name in mjpeg.c,
|
||||
* except for some coefficient changes.
|
||||
*/
|
||||
static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
|
||||
uint16_t (*qmat16)[2][64], const uint16_t *quant_matrix,
|
||||
int bias, int qmin, int qmax) {
|
||||
@ -130,6 +139,9 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is a clone of mjpeg_encode_dc
|
||||
*/
|
||||
static inline void encode_dc(MpegEncContext *s, int val,
|
||||
uint8_t *huff_size, uint16_t *huff_code) {
|
||||
int mant, nbits;
|
||||
@ -142,19 +154,15 @@ static inline void encode_dc(MpegEncContext *s, int val,
|
||||
val = -val;
|
||||
mant--;
|
||||
}
|
||||
|
||||
/* compute the log (XXX: optimize) */
|
||||
nbits = 0;
|
||||
while (val != 0) {
|
||||
val = val >> 1;
|
||||
nbits++;
|
||||
}
|
||||
|
||||
nbits= av_log2_16bit(val) + 1;
|
||||
put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
|
||||
put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine is a duplicate of encode_block in mjpeg.c
|
||||
*/
|
||||
static void encode_block(MpegEncContext *s, DCTELEM *block, int n) {
|
||||
int mant, nbits, code, i, j;
|
||||
int component, dc, run, last_index, val;
|
||||
@ -199,12 +207,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n) {
|
||||
mant--;
|
||||
}
|
||||
|
||||
/* compute the log (XXX: optimize) */
|
||||
nbits = 0;
|
||||
while (val != 0) {
|
||||
val = val >> 1;
|
||||
nbits++;
|
||||
}
|
||||
nbits= av_log2_16bit(val) + 1;
|
||||
code = (run << 4) | nbits;
|
||||
|
||||
put_bits(&s->pb, huff_size_ac[code],
|
||||
@ -241,9 +244,6 @@ typedef struct {
|
||||
struct MpegEncContext *s;
|
||||
int cheap_upsample;
|
||||
int bw;
|
||||
int y_ps;
|
||||
int u_ps;
|
||||
int v_ps;
|
||||
int y_rs;
|
||||
int u_rs;
|
||||
int v_rs;
|
||||
@ -253,7 +253,7 @@ typedef struct {
|
||||
* changes, it allows for black&white encoding (it skips the U and V
|
||||
* macroblocks and it outputs the huffman code for 'no change' (dc) and
|
||||
* 'all zero' (ac)) and it takes 4 macroblocks (422) instead of 6 (420) */
|
||||
static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
|
||||
static always_inline void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
|
||||
|
||||
MJpegContext *m = j->s->mjpeg_ctx;
|
||||
|
||||
@ -279,11 +279,58 @@ static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Taking one MCU (YUYV) from 8-bit pixel planar storage and
|
||||
* filling it into four 16-bit pixel DCT macroblocks.
|
||||
*/
|
||||
static always_inline void fill_block(jpeg_enc_t *j, int x, int y,
|
||||
unsigned char *y_data, unsigned char *u_data,
|
||||
unsigned char *v_data)
|
||||
{
|
||||
int i, k;
|
||||
short int *dest;
|
||||
unsigned char *source;
|
||||
|
||||
// The first Y, Y0
|
||||
get_pixels(j->s->block[0], y*8*j->y_rs + 16*x + y_data, j->y_rs);
|
||||
// The second Y, Y1
|
||||
get_pixels(j->s->block[1], y*8*j->y_rs + 16*x + 8 + y_data, j->y_rs);
|
||||
|
||||
if (!j->bw && j->cheap_upsample) {
|
||||
source = y * 4 * j->u_rs + 8*x + u_data;
|
||||
dest = j->s->block[2];
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (k = 0; k < 8; k++) {
|
||||
dest[k] = source[k]; // First row
|
||||
dest[k+8] = source[k]; // Duplicate to next row
|
||||
|
||||
}
|
||||
dest += 16;
|
||||
source += j->u_rs;
|
||||
}
|
||||
source = y * 4 * j->v_rs + 8*x + v_data;
|
||||
dest = j->s->block[3];
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (k = 0; k < 8; k++) {
|
||||
dest[k] = source[k];
|
||||
dest[k+8] = source[k];
|
||||
}
|
||||
dest += 16;
|
||||
source += j->u_rs;
|
||||
}
|
||||
} else if (!j->bw && !j->cheap_upsample) {
|
||||
// U
|
||||
get_pixels(j->s->block[2], y*8*j->u_rs + 8*x + u_data, j->u_rs);
|
||||
// V
|
||||
get_pixels(j->s->block[3], y*8*j->v_rs + 8*x + v_data, j->v_rs);
|
||||
}
|
||||
}
|
||||
|
||||
/* this function can take all kinds of YUV colorspaces
|
||||
* YV12, YVYU, UYVY. The necesary parameters must be set up by the caller
|
||||
* y_ps means "y pixel size", y_rs means "y row size".
|
||||
* y_rs means "y row size".
|
||||
* For YUYV, for example, is u_buf = y_buf + 1, v_buf = y_buf + 3,
|
||||
* y_ps = 2, u_ps = 4, v_ps = 4, y_rs = u_rs = v_rs.
|
||||
* y_rs = u_rs = v_rs.
|
||||
*
|
||||
* The actual buffers must be passed with mjpeg_encode_frame, this is
|
||||
* to make it possible to call encode on the buffer provided by the
|
||||
@ -301,46 +348,41 @@ static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
|
||||
/* The encoder doesn't know anything about interlacing, the halve height
|
||||
* needs to be passed and the double rowstride. Which field gets encoded
|
||||
* is decided by what buffers are passed to mjpeg_encode_frame */
|
||||
static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
|
||||
int u_psize, int u_rsize, int v_psize, int v_rsize,
|
||||
static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_rsize,
|
||||
int u_rsize, int v_rsize,
|
||||
int cu, int q, int b) {
|
||||
jpeg_enc_t *j;
|
||||
int i = 0;
|
||||
VERBOSE("JPEG encoder init: %dx%d %d %d %d %d %d %d\n",
|
||||
w, h, y_psize, y_rsize, u_psize,
|
||||
u_rsize, v_psize, v_rsize);
|
||||
VERBOSE("JPEG encoder init: %dx%d %d %d %d cu=%d q=%d bw=%d\n",
|
||||
w, h, y_rsize, u_rsize, v_rsize, cu, q, b);
|
||||
|
||||
j = malloc(sizeof(jpeg_enc_t));
|
||||
j = av_mallocz(sizeof(jpeg_enc_t));
|
||||
if (j == NULL) return NULL;
|
||||
|
||||
j->s = malloc(sizeof(MpegEncContext));
|
||||
memset(j->s,0x00,sizeof(MpegEncContext));
|
||||
j->s = av_mallocz(sizeof(MpegEncContext));
|
||||
if (j->s == NULL) {
|
||||
free(j);
|
||||
av_free(j);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* info on how to access the pixels */
|
||||
j->y_ps = y_psize;
|
||||
j->u_ps = u_psize;
|
||||
j->v_ps = v_psize;
|
||||
j->y_rs = y_rsize;
|
||||
j->u_rs = u_rsize;
|
||||
j->v_rs = v_rsize;
|
||||
|
||||
j->s->width = w;
|
||||
j->s->width = w; // image width and height
|
||||
j->s->height = h;
|
||||
j->s->qscale = q;
|
||||
j->s->qscale = q; // Encoding quality
|
||||
|
||||
j->s->mjpeg_data_only_frames = 0;
|
||||
j->s->out_format = FMT_MJPEG;
|
||||
j->s->intra_only = 1;
|
||||
j->s->encoding = 1;
|
||||
j->s->intra_only = 1; // Generate only intra pictures for jpeg
|
||||
j->s->encoding = 1; // Set mode to encode
|
||||
j->s->pict_type = I_TYPE;
|
||||
j->s->y_dc_scale = 8;
|
||||
j->s->c_dc_scale = 8;
|
||||
|
||||
j->s->mjpeg_write_tables = 1;
|
||||
j->s->mjpeg_write_tables = 1; // setup to write tables
|
||||
j->s->mjpeg_vsample[0] = 1;
|
||||
j->s->mjpeg_vsample[1] = 1;
|
||||
j->s->mjpeg_vsample[2] = 1;
|
||||
@ -351,23 +393,40 @@ static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
|
||||
j->cheap_upsample = cu;
|
||||
j->bw = b;
|
||||
|
||||
// Is this needed?
|
||||
/* if libavcodec is used by the decoder then we must not
|
||||
* initialize again, but if it is not initialized then we must
|
||||
* initialize it here. */
|
||||
if (!avcodec_inited) {
|
||||
avcodec_init();
|
||||
avcodec_register_all();
|
||||
avcodec_inited=1;
|
||||
}
|
||||
|
||||
if (mjpeg_init(j->s) < 0) {
|
||||
free(j->s);
|
||||
free(j);
|
||||
av_free(j->s);
|
||||
av_free(j);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* alloc bogus avctx to keep MPV_common_init from segfaulting */
|
||||
j->s->avctx = calloc(sizeof(*j->s->avctx), 1);
|
||||
/* Set up to encode mjpeg */
|
||||
j->s->avctx->codec_id = CODEC_ID_MJPEG;
|
||||
j->s->avctx = avcodec_alloc_context();
|
||||
if (j->s->avctx == NULL) {
|
||||
av_free(j->s);
|
||||
av_free(j);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* make MPV_common_init allocate important buffers, like s->block */
|
||||
// Set some a minimum amount of default values that are needed
|
||||
j->s->avctx->codec_id = CODEC_ID_MJPEG;
|
||||
j->s->avctx->dct_algo = FF_DCT_AUTO;
|
||||
j->s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
|
||||
j->s->avctx->thread_count = 1;
|
||||
|
||||
/* make MPV_common_init allocate important buffers, like s->block */
|
||||
if (MPV_common_init(j->s) < 0) {
|
||||
free(j->s);
|
||||
free(j);
|
||||
av_free(j->s);
|
||||
av_free(j);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -375,24 +434,28 @@ static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
|
||||
j->s->mb_height = j->s->height/8;
|
||||
j->s->mb_intra = 1;
|
||||
|
||||
// Init q matrix
|
||||
j->s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
|
||||
for (i = 1; i < 64; i++)
|
||||
j->s->intra_matrix[i] = clip_uint8(
|
||||
(ff_mpeg1_default_intra_matrix[i]*j->s->qscale) >> 3);
|
||||
// precompute matrix
|
||||
convert_matrix(j->s, j->s->q_intra_matrix, j->s->q_intra_matrix16,
|
||||
j->s->intra_matrix, j->s->intra_quant_bias, 8, 8);
|
||||
|
||||
get_pixels = j->s->dsp.get_pixels;
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
static int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
|
||||
unsigned char *u_data, unsigned char *v_data, char *bufr) {
|
||||
int i, k, mb_x, mb_y, overflow;
|
||||
short int *dest;
|
||||
unsigned char *source;
|
||||
static int jpeg_enc_frame(jpeg_enc_t *j, uint8_t *y_data,
|
||||
uint8_t *u_data, uint8_t *v_data, uint8_t *bufr) {
|
||||
int mb_x, mb_y, overflow;
|
||||
/* initialize the buffer */
|
||||
|
||||
init_put_bits(&j->s->pb, bufr, 1024*256);
|
||||
|
||||
// Emit the mjpeg header blocks
|
||||
mjpeg_picture_header(j->s);
|
||||
|
||||
j->s->header_bits = put_bits_count(&j->s->pb);
|
||||
@ -403,72 +466,11 @@ static int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
|
||||
|
||||
for (mb_y = 0; mb_y < j->s->mb_height; mb_y++) {
|
||||
for (mb_x = 0; mb_x < j->s->mb_width; mb_x++) {
|
||||
/* conversion 8 to 16 bit and filling of blocks
|
||||
* must be mmx optimized */
|
||||
/* fill 2 Y macroblocks and one U and one V */
|
||||
source = mb_y * 8 * j->y_rs +
|
||||
16 * j->y_ps * mb_x + y_data;
|
||||
dest = j->s->block[0];
|
||||
for (i = 0; i < 8; i++) {
|
||||
for (k = 0; k < 8; k++) {
|
||||
dest[k] = source[k*j->y_ps];
|
||||
}
|
||||
dest += 8;
|
||||
source += j->y_rs;
|
||||
}
|
||||
source = mb_y * 8 * j->y_rs +
|
||||
(16*mb_x + 8)*j->y_ps + y_data;
|
||||
dest = j->s->block[1];
|
||||
for (i = 0; i < 8; i++) {
|
||||
for (k = 0; k < 8; k++) {
|
||||
dest[k] = source[k*j->y_ps];
|
||||
}
|
||||
dest += 8;
|
||||
source += j->y_rs;
|
||||
}
|
||||
if (!j->bw && j->cheap_upsample) {
|
||||
source = mb_y*4*j->u_rs +
|
||||
8*mb_x*j->u_ps + u_data;
|
||||
dest = j->s->block[2];
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (k = 0; k < 8; k++) {
|
||||
dest[k] = source[k*j->u_ps];
|
||||
dest[k+8] = source[k*j->u_ps];
|
||||
}
|
||||
dest += 16;
|
||||
source += j->u_rs;
|
||||
}
|
||||
source = mb_y*4*j->v_rs +
|
||||
8*mb_x*j->v_ps + v_data;
|
||||
dest = j->s->block[3];
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (k = 0; k < 8; k++) {
|
||||
dest[k] = source[k*j->v_ps];
|
||||
dest[k+8] = source[k*j->v_ps];
|
||||
}
|
||||
dest += 16;
|
||||
source += j->u_rs;
|
||||
}
|
||||
} else if (!j->bw && !j->cheap_upsample) {
|
||||
source = mb_y*8*j->u_rs +
|
||||
8*mb_x*j->u_ps + u_data;
|
||||
dest = j->s->block[2];
|
||||
for (i = 0; i < 8; i++) {
|
||||
for (k = 0; k < 8; k++)
|
||||
dest[k] = source[k*j->u_ps];
|
||||
dest += 8;
|
||||
source += j->u_rs;
|
||||
}
|
||||
source = mb_y*8*j->v_rs +
|
||||
8*mb_x*j->v_ps + v_data;
|
||||
dest = j->s->block[3];
|
||||
for (i = 0; i < 8; i++) {
|
||||
for (k = 0; k < 8; k++)
|
||||
dest[k] = source[k*j->v_ps];
|
||||
dest += 8;
|
||||
source += j->u_rs;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Fill one DCT block (8x8 pixels) from
|
||||
* 2 Y macroblocks and one U and one V
|
||||
*/
|
||||
fill_block(j, mb_x, mb_y, y_data, u_data, v_data);
|
||||
emms_c(); /* is this really needed? */
|
||||
|
||||
j->s->block_last_index[0] =
|
||||
@ -509,8 +511,8 @@ static int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
|
||||
|
||||
static void jpeg_enc_uninit(jpeg_enc_t *j) {
|
||||
mjpeg_close(j->s);
|
||||
free(j->s);
|
||||
free(j);
|
||||
av_free(j->s);
|
||||
av_free(j);
|
||||
}
|
||||
|
||||
struct vf_priv_s {
|
||||
@ -654,11 +656,11 @@ static int config(struct vf_instance_s* vf, int width, int height, int d_width,
|
||||
|
||||
priv->y_stride = width;
|
||||
priv->c_stride = width/2;
|
||||
priv->j = jpeg_enc_init(width, height/priv->fields, 1,
|
||||
priv->fields*priv->y_stride, 1,
|
||||
priv->fields*priv->c_stride, 1,
|
||||
priv->fields*priv->c_stride, 1,
|
||||
priv->quality, priv->bw);
|
||||
priv->j = jpeg_enc_init(width, height/priv->fields,
|
||||
priv->fields*priv->y_stride,
|
||||
priv->fields*priv->c_stride,
|
||||
priv->fields*priv->c_stride,
|
||||
1, priv->quality, priv->bw);
|
||||
|
||||
if (!priv->j) return 0;
|
||||
return vf_next_config(vf, width, height, d_width, d_height, flags,
|
||||
|
Loading…
Reference in New Issue
Block a user