mpv/video/mp_image.h

197 lines
8.2 KiB
C
Raw Normal View History

/*
* This file is part of mpv.
*
* mpv is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* mpv is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with mpv. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef MPLAYER_MP_IMAGE_H
#define MPLAYER_MP_IMAGE_H
2012-12-11 23:43:36 +00:00
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "common/common.h"
#include "common/msg.h"
#include "csputils.h"
#include "video/img_format.h"
video: generally try to align image data on 64 bytes Generally, using x86 SIMD efficiently (or crash-free) requires aligning all data on boundaries of 16, 32, or 64 (depending on instruction set used). 64 bytes is needed or AVX-512, 32 for old AVX, 16 for SSE. Both FFmpeg and zimg usually require aligned data for this reason. FFmpeg is very unclear about alignment. Yes, it requires you to align data pointers and strides. No, it doesn't tell you how much, except sometimes (libavcodec has a legacy-looking avcodec_align_dimensions2() API function, that requires a heavy-weight AVCodecContext as argument). Sometimes, FFmpeg will take a shit on YOUR and ITS OWN alignment. For example, vf_crop will randomly reduce alignment of data pointers, depending on the crop parameters. On the other hand, some libavfilter filters or libavcodec encoders may randomly crash if they get the wrong alignment. I have no idea how this thing works at all. FFmpeg usually doesn't seem to signal alignment internal anywhere, and usually leaves it to av_malloc() etc. to allocate with proper alignment. libavutil/mem.c currently has a ALIGN define, which is set to 64 if FFmpeg is built with AVX-512 support, or as low as 16 if built without any AVX support. The really funny thing is that a normal FFmpeg build will e.g. align tiny string allocations to 64 bytes, even if the machine does not support AVX at all. For zimg use (in a later commit), we also want guaranteed alignment. Modern x86 should actually not be much slower at unaligned accesses, but that doesn't help. zimg's dumb intrinsic code apparently randomly chooses between aligned or unaligned accesses (depending on compiler, I guess), and on some CPUs these can even cause crashes. So just treat the requirement to align as a fact of life. All this means that we should probably make sure our own allocations are 64 bit aligned. This still doesn't guarantee alignment in all cases, but it's slightly better than before. This also makes me wonder whether we should always override libavcodec's buffer pool, just so we have a guaranteed alignment. Currently, we only do that if --vd-lavc-dr is used (and if that actually works). On the other hand, it always uses DR on my machine, so who cares.
2019-07-15 01:06:47 +00:00
// Assumed minimum align needed for image allocation. It's notable that FFmpeg's
// libraries except libavcodec don't really know what alignment they want.
// Things will randomly crash or get slower if the alignment is not satisfied.
// Whatever. This value should be pretty safe with current CPU architectures.
#define MP_IMAGE_BYTE_ALIGN 64
#define MP_IMGFIELD_TOP_FIRST 0x02
#define MP_IMGFIELD_REPEAT_FIRST 0x04
#define MP_IMGFIELD_INTERLACED 0x20
// Describes image parameters that usually stay constant.
// New fields can be added in the future. Code changing the parameters should
// usually copy the whole struct, so that fields added later will be preserved.
struct mp_image_params {
enum mp_imgfmt imgfmt; // pixel format
enum mp_imgfmt hw_subfmt; // underlying format for some hwaccel pixfmts
int w, h; // image dimensions
int p_w, p_h; // define pixel aspect ratio (undefined: 0/0)
struct mp_colorspace color;
enum mp_chroma_location chroma_location;
2014-04-20 19:28:09 +00:00
// The image should be rotated clockwise (0-359 degrees).
int rotate;
enum mp_stereo3d_mode stereo3d; // image is encoded with this mode
enum mp_alpha_type alpha; // usually auto; only set if explicitly known
};
2012-12-11 23:43:36 +00:00
/* Memory management:
* - mp_image is a light-weight reference to the actual image data (pixels).
* The actual image data is reference counted and can outlive mp_image
* allocations. mp_image references can be created with mp_image_new_ref()
* and free'd with talloc_free() (the helpers mp_image_setrefp() and
* mp_image_unrefp() can also be used). The actual image data is free'd when
* the last mp_image reference to it is free'd.
* - Each mp_image has a clear owner. The owner can do anything with it, such
* as changing mp_image fields. Instead of making ownership ambiguous by
* sharing a mp_image reference, new references should be created.
* - Write access to the actual image data is allowed only after calling
* mp_image_make_writeable(), or if mp_image_is_writeable() returns true.
* Conceptually, images can be changed by their owner only, and copy-on-write
* is used to ensure that other references do not see any changes to the
* image data. mp_image_make_writeable() will do that copy if required.
*/
typedef struct mp_image {
int w, h; // visible dimensions (redundant with params.w/h)
struct mp_image_params params;
// fields redundant to params.imgfmt, for convenience or compatibility
struct mp_imgfmt_desc fmt;
enum mp_imgfmt imgfmt;
int num_planes;
uint8_t *planes[MP_MAX_PLANES];
int stride[MP_MAX_PLANES];
int pict_type; // 0->unknown, 1->I, 2->P, 3->B
int fields;
/* only inside filter chain */
double pts;
/* only after decoder */
double dts, pkt_duration;
/* container reported FPS; can be incorrect, or 0 if unknown */
double nominal_fps;
/* for private use */
void* priv;
// Reference-counted data references.
// These do not necessarily map directly to planes[]. They can have
// different order or count. There shouldn't be more buffers than planes.
// If bufs[n] is NULL, bufs[n+1] must also be NULL.
// All mp_* functions manage this automatically; do not mess with it.
// (See also AVFrame.buf.)
struct AVBufferRef *bufs[MP_MAX_PLANES];
// Points to AVHWFramesContext* (same as AVFrame.hw_frames_ctx)
struct AVBufferRef *hwctx;
// Embedded ICC profile, if any
struct AVBufferRef *icc_profile;
// Closed captions packet, if any (only after decoder)
struct AVBufferRef *a53_cc;
// Dolby Vision metadata, if any
struct AVBufferRef *dovi;
// Other side data we don't care about.
struct mp_ff_side_data *ff_side_data;
int num_ff_side_data;
} mp_image_t;
struct mp_ff_side_data {
int type;
struct AVBufferRef *buf;
};
int mp_chroma_div_up(int size, int shift);
int mp_image_get_alloc_size(int imgfmt, int w, int h, int stride_align);
struct mp_image *mp_image_from_buffer(int imgfmt, int w, int h, int stride_align,
uint8_t *buffer, int buffer_size,
void *free_opaque,
void (*free)(void *opaque, uint8_t *data));
struct mp_image *mp_image_alloc(int fmt, int w, int h);
2012-12-11 23:43:36 +00:00
void mp_image_copy(struct mp_image *dmpi, struct mp_image *mpi);
void mp_image_copy_attributes(struct mp_image *dmpi, struct mp_image *mpi);
struct mp_image *mp_image_new_copy(struct mp_image *img);
struct mp_image *mp_image_new_ref(struct mp_image *img);
bool mp_image_is_writeable(struct mp_image *img);
bool mp_image_make_writeable(struct mp_image *img);
2012-12-11 23:43:36 +00:00
void mp_image_setrefp(struct mp_image **p_img, struct mp_image *new_value);
void mp_image_unrefp(struct mp_image **p_img);
void mp_image_clear(struct mp_image *mpi, int x0, int y0, int x1, int y1);
void mp_image_clear_rc(struct mp_image *mpi, struct mp_rect rc);
void mp_image_clear_rc_inv(struct mp_image *mpi, struct mp_rect rc);
void mp_image_crop(struct mp_image *img, int x0, int y0, int x1, int y1);
void mp_image_crop_rc(struct mp_image *img, struct mp_rect rc);
void mp_image_vflip(struct mp_image *img);
void mp_image_set_size(struct mp_image *mpi, int w, int h);
int mp_image_plane_w(struct mp_image *mpi, int plane);
int mp_image_plane_h(struct mp_image *mpi, int plane);
void mp_image_setfmt(mp_image_t* mpi, int out_fmt);
2012-12-11 23:43:36 +00:00
void mp_image_steal_data(struct mp_image *dst, struct mp_image *src);
void mp_image_unref_data(struct mp_image *img);
2012-12-11 23:43:36 +00:00
Implement backwards playback See manpage additions. This is a huge hack. You can bet there are shit tons of bugs. It's literally forcing square pegs into round holes. Hopefully, the manpage wall of text makes it clear enough that the whole shit can easily crash and burn. (Although it shouldn't literally crash. That would be a bug. It possibly _could_ start a fire by entering some sort of endless loop, not a literal one, just something where it tries to do work without making progress.) (Some obvious bugs I simply ignored for this initial version, but there's a number of potential bugs I can't even imagine. Normal playback should remain completely unaffected, though.) How this works is also described in the manpage. Basically, we demux in reverse, then we decode in reverse, then we render in reverse. The decoding part is the simplest: just reorder the decoder output. This weirdly integrates with the timeline/ordered chapter code, which also has special requirements on feeding the packets to the decoder in a non-straightforward way (it doesn't conflict, although a bugmessmass breaks correct slicing of segments, so EDL/ordered chapter playback is broken in backward direction). Backward demuxing is pretty involved. In theory, it could be much easier: simply iterating the usual demuxer output backward. But this just doesn't fit into our code, so there's a cthulhu nightmare of shit. To be specific, each stream (audio, video) is reversed separately. At least this means we can do backward playback within cached content (for example, you could play backwards in a live stream; on that note, it disables prefetching, which would lead to losing new live video, but this could be avoided). The fuckmess also meant that I didn't bother trying to support subtitles. Subtitles are a problem because they're "sparse" streams. They need to be "passively" demuxed: you don't try to read a subtitle packet, you demux audio and video, and then look whether there was a subtitle packet. This means to get subtitles for a time range, you need to know that you demuxed video and audio over this range, which becomes pretty messy when you demux audio and video backwards separately. Backward display is the most weird (and potentially buggy) part. To avoid that we need to touch a LOT of timing code, we negate all timestamps. The basic idea is that due to the navigation, all comparisons and subtractions of timestamps keep working, and you don't need to touch every single of them to "reverse" them. E.g.: bool before = pts_a < pts_b; would need to be: bool before = forward ? pts_a < pts_b : pts_a > pts_b; or: bool before = pts_a * dir < pts_b * dir; or if you, as it's implemented now, just do this after decoding: pts_a *= dir; pts_b *= dir; and then in the normal timing/renderer code: bool before = pts_a < pts_b; Consequently, we don't need many changes in the latter code. But some assumptions inhererently true for forward playback may have been broken anyway. What is mainly needed is fixing places where values are passed between positive and negative "domains". For example, seeking and timestamp user display always uses positive timestamps. The main mess is that it's not obvious which domain a given variable should or does use. Well, in my tests with a single file, it suddenly started to work when I did this. I'm honestly surprised that it did, and that I didn't have to change a single line in the timing code past decoder (just something minor to make external/cached text subtitles display). I committed it immediately while avoiding thinking about it. But there really likely are subtle problems of all sorts. As far as I'm aware, gstreamer also supports backward playback. When I looked at this years ago, I couldn't find a way to actually try this, and I didn't revisit it now. Back then I also read talk slides from the person who implemented it, and I'm not sure if and which ideas I might have taken from it. It's possible that the timestamp reversal is inspired by it, but I didn't check. (I think it claimed that it could avoid large changes by changing a sign?) VapourSynth has some sort of reverse function, which provides a backward view on a video. The function itself is trivial to implement, as VapourSynth aims to provide random access to video by frame numbers (so you just request decreasing frame numbers). From what I remember, it wasn't exactly fluid, but it worked. It's implemented by creating an index, and seeking to the target on demand, and a bunch of caching. mpv could use it, but it would either require using VapourSynth as demuxer and decoder for everything, or replacing the current file every time something is supposed to be played backwards. FFmpeg's libavfilter has reversal filters for audio and video. These require buffering the entire media data of the file, and don't really fit into mpv's architecture. It could be used by playing a libavfilter graph that also demuxes, but that's like VapourSynth but worse.
2019-05-18 00:10:51 +00:00
int mp_image_approx_byte_size(struct mp_image *img);
struct mp_image *mp_image_new_dummy_ref(struct mp_image *img);
2012-12-11 23:43:36 +00:00
struct mp_image *mp_image_new_custom_ref(struct mp_image *img, void *arg,
void (*free)(void *arg));
void mp_image_params_guess_csp(struct mp_image_params *params);
char *mp_image_params_to_str_buf(char *b, size_t bs,
const struct mp_image_params *p);
#define mp_image_params_to_str(p) mp_image_params_to_str_buf((char[256]){0}, 256, p)
bool mp_image_params_valid(const struct mp_image_params *p);
bool mp_image_params_equal(const struct mp_image_params *p1,
const struct mp_image_params *p2);
void mp_image_params_get_dsize(const struct mp_image_params *p,
int *d_w, int *d_h);
void mp_image_params_set_dsize(struct mp_image_params *p, int d_w, int d_h);
void mp_image_set_params(struct mp_image *image,
const struct mp_image_params *params);
void mp_image_set_attributes(struct mp_image *image,
const struct mp_image_params *params);
struct AVFrame;
struct mp_image *mp_image_from_av_frame(struct AVFrame *av_frame);
struct AVFrame *mp_image_to_av_frame(struct mp_image *img);
struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img);
void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height,
int dstStride, int srcStride);
void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride);
void memset16_pic(void *dst, int fill, int unitsPerLine, int height, int stride);
void *mp_image_pixel_ptr(struct mp_image *img, int plane, int x, int y);
void *mp_image_pixel_ptr_ny(struct mp_image *img, int plane, int x, int y);
size_t mp_image_plane_bytes(struct mp_image *img, int plane, int x0, int w);
#endif /* MPLAYER_MP_IMAGE_H */