tdesktop/Telegram/SourceFiles/media/media_clip_ffmpeg.cpp
John Preston 9fe714189d updateNewMessage now can request getDifference(), if data is absent.
Video sync and frame duration count improved.
Seek in not 44100 and not 48000 hz audio streams fixed.
2016-07-21 20:35:55 +03:00

515 lines
17 KiB
C++

/*
This file is part of Telegram Desktop,
the official desktop version of Telegram messaging app, see https://telegram.org
Telegram Desktop is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
It is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.
Full license: https://github.com/telegramdesktop/tdesktop/blob/master/LICENSE
Copyright (c) 2014-2016 John Preston, https://desktop.telegram.org
*/
#include "stdafx.h"
#include "media/media_clip_ffmpeg.h"
#include "media/media_audio.h"
#include "media/media_child_ffmpeg_loader.h"
namespace Media {
namespace Clip {
namespace internal {
FFMpegReaderImplementation::FFMpegReaderImplementation(FileLocation *location, QByteArray *data, uint64 playId) : ReaderImplementation(location, data)
, _playId(playId) {
_frame = av_frame_alloc();
av_init_packet(&_packetNull);
_packetNull.data = nullptr;
_packetNull.size = 0;
}
ReaderImplementation::ReadResult FFMpegReaderImplementation::readNextFrame() {
if (_frameRead) {
av_frame_unref(_frame);
_frameRead = false;
}
while (true) {
while (_packetQueue.isEmpty()) {
auto packetResult = readAndProcessPacket();
if (packetResult == PacketResult::Error) {
return ReadResult::Error;
} else if (packetResult == PacketResult::EndOfFile) {
break;
}
}
bool eofReached = _packetQueue.isEmpty();
startPacket();
int got_frame = 0;
auto packet = &_packetNull;
AVPacket tempPacket;
if (!_packetQueue.isEmpty()) {
FFMpeg::packetFromDataWrap(tempPacket, _packetQueue.head());
packet = &tempPacket;
}
int decoded = packet->size;
int res = 0;
if ((res = avcodec_decode_video2(_codecContext, _frame, &got_frame, packet)) < 0) {
char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
LOG(("Gif Error: Unable to avcodec_decode_video2() %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
if (res == AVERROR_INVALIDDATA) { // try to skip bad packet
finishPacket();
continue;
}
eofReached = (res == AVERROR_EOF);
if (!eofReached || !_hadFrame) { // try to skip end of file
return ReadResult::Error;
}
}
if (res > 0) decoded = res;
if (!_packetQueue.isEmpty()) {
packet->data += decoded;
packet->size -= decoded;
if (packet->size <= 0) {
finishPacket();
}
}
if (got_frame) {
int64 duration = av_frame_get_pkt_duration(_frame);
int64 framePts = (_frame->pkt_pts == AV_NOPTS_VALUE) ? _frame->pkt_dts : _frame->pkt_pts;
int64 frameMs = (framePts * 1000LL * _fmtContext->streams[_streamId]->time_base.num) / _fmtContext->streams[_streamId]->time_base.den;
_currentFrameDelay = _nextFrameDelay;
if (_frameMs + _currentFrameDelay < frameMs) {
_currentFrameDelay = int32(frameMs - _frameMs);
} else if (frameMs < _frameMs + _currentFrameDelay) {
frameMs = _frameMs + _currentFrameDelay;
}
if (duration == AV_NOPTS_VALUE) {
_nextFrameDelay = 0;
} else {
_nextFrameDelay = (duration * 1000LL * _fmtContext->streams[_streamId]->time_base.num) / _fmtContext->streams[_streamId]->time_base.den;
}
_frameMs = frameMs;
_hadFrame = _frameRead = true;
_frameTime += _currentFrameDelay;
return ReadResult::Success;
}
if (eofReached) {
clearPacketQueue();
if (_mode == Mode::Normal) {
return ReadResult::Eof;
}
if ((res = avformat_seek_file(_fmtContext, _streamId, std::numeric_limits<int64_t>::min(), 0, std::numeric_limits<int64_t>::max(), 0)) < 0) {
if ((res = av_seek_frame(_fmtContext, _streamId, 0, AVSEEK_FLAG_BYTE)) < 0) {
if ((res = av_seek_frame(_fmtContext, _streamId, 0, AVSEEK_FLAG_FRAME)) < 0) {
if ((res = av_seek_frame(_fmtContext, _streamId, 0, 0)) < 0) {
char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
LOG(("Gif Error: Unable to av_seek_frame() to the start %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
return ReadResult::Error;
}
}
}
}
avcodec_flush_buffers(_codecContext);
_hadFrame = false;
_frameMs = 0;
_lastReadVideoMs = _lastReadAudioMs = 0;
}
}
return ReadResult::Error;
}
ReaderImplementation::ReadResult FFMpegReaderImplementation::readFramesTill(int64 frameMs, uint64 systemMs) {
if (_audioStreamId < 0) { // just keep up
if (_frameRead && _frameTime > frameMs) {
return ReadResult::Success;
}
auto readResult = readNextFrame();
if (readResult != ReadResult::Success || _frameTime > frameMs) {
return readResult;
}
readResult = readNextFrame();
if (_frameTime <= frameMs) {
_frameTime = frameMs + 5; // keep up
}
return readResult;
}
// sync by audio stream
auto correctMs = (frameMs >= 0) ? audioPlayer()->getVideoCorrectedTime(_playId, frameMs, systemMs) : frameMs;
if (!_frameRead) {
auto readResult = readNextFrame();
if (readResult != ReadResult::Success) {
return readResult;
}
}
while (_frameTime <= correctMs) {
auto readResult = readNextFrame();
if (readResult != ReadResult::Success) {
return readResult;
}
}
if (frameMs >= 0) {
_frameTimeCorrection = frameMs - correctMs;
}
return ReadResult::Success;
}
int64 FFMpegReaderImplementation::frameRealTime() const {
return _frameMs;
}
uint64 FFMpegReaderImplementation::framePresentationTime() const {
return static_cast<uint64>(qMax(_frameTime + _frameTimeCorrection, 0LL));
}
int64 FFMpegReaderImplementation::durationMs() const {
if (_fmtContext->streams[_streamId]->duration == AV_NOPTS_VALUE) return 0;
return (_fmtContext->streams[_streamId]->duration * 1000LL * _fmtContext->streams[_streamId]->time_base.num) / _fmtContext->streams[_streamId]->time_base.den;
}
void FFMpegReaderImplementation::pauseAudio() {
if (_audioStreamId >= 0) {
audioPlayer()->pauseFromVideo(_playId);
}
}
void FFMpegReaderImplementation::resumeAudio() {
if (_audioStreamId >= 0) {
audioPlayer()->resumeFromVideo(_playId);
}
}
bool FFMpegReaderImplementation::renderFrame(QImage &to, bool &hasAlpha, const QSize &size) {
t_assert(_frameRead);
_frameRead = false;
if (!_width || !_height) {
_width = _frame->width;
_height = _frame->height;
if (!_width || !_height) {
LOG(("Gif Error: Bad frame size %1").arg(logData()));
return false;
}
}
QSize toSize(size.isEmpty() ? QSize(_width, _height) : size);
if (to.isNull() || to.size() != toSize) {
to = QImage(toSize, QImage::Format_ARGB32);
}
hasAlpha = (_frame->format == AV_PIX_FMT_BGRA || (_frame->format == -1 && _codecContext->pix_fmt == AV_PIX_FMT_BGRA));
if (_frame->width == toSize.width() && _frame->height == toSize.height() && hasAlpha) {
int32 sbpl = _frame->linesize[0], dbpl = to.bytesPerLine(), bpl = qMin(sbpl, dbpl);
uchar *s = _frame->data[0], *d = to.bits();
for (int32 i = 0, l = _frame->height; i < l; ++i) {
memcpy(d + i * dbpl, s + i * sbpl, bpl);
}
} else {
if ((_swsSize != toSize) || (_frame->format != -1 && _frame->format != _codecContext->pix_fmt) || !_swsContext) {
_swsSize = toSize;
_swsContext = sws_getCachedContext(_swsContext, _frame->width, _frame->height, AVPixelFormat(_frame->format), toSize.width(), toSize.height(), AV_PIX_FMT_BGRA, 0, 0, 0, 0);
}
uint8_t * toData[1] = { to.bits() };
int toLinesize[1] = { to.bytesPerLine() }, res;
if ((res = sws_scale(_swsContext, _frame->data, _frame->linesize, 0, _frame->height, toData, toLinesize)) != _swsSize.height()) {
LOG(("Gif Error: Unable to sws_scale to good size %1, height %2, should be %3").arg(logData()).arg(res).arg(_swsSize.height()));
return false;
}
}
// Read some future packets for audio stream.
if (_audioStreamId >= 0) {
while (_frameMs + 5000 > _lastReadAudioMs
&& _frameMs + 15000 > _lastReadVideoMs) {
auto packetResult = readAndProcessPacket();
if (packetResult != PacketResult::Ok) {
break;
}
}
}
av_frame_unref(_frame);
return true;
}
bool FFMpegReaderImplementation::start(Mode mode, int64 &positionMs) {
_mode = mode;
initDevice();
if (!_device->open(QIODevice::ReadOnly)) {
LOG(("Gif Error: Unable to open device %1").arg(logData()));
return false;
}
_ioBuffer = (uchar*)av_malloc(AVBlockSize);
_ioContext = avio_alloc_context(_ioBuffer, AVBlockSize, 0, static_cast<void*>(this), &FFMpegReaderImplementation::_read, 0, &FFMpegReaderImplementation::_seek);
_fmtContext = avformat_alloc_context();
if (!_fmtContext) {
LOG(("Gif Error: Unable to avformat_alloc_context %1").arg(logData()));
return false;
}
_fmtContext->pb = _ioContext;
int res = 0;
char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
if ((res = avformat_open_input(&_fmtContext, 0, 0, 0)) < 0) {
_ioBuffer = 0;
LOG(("Gif Error: Unable to avformat_open_input %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
return false;
}
_opened = true;
if ((res = avformat_find_stream_info(_fmtContext, 0)) < 0) {
LOG(("Gif Error: Unable to avformat_find_stream_info %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
return false;
}
_streamId = av_find_best_stream(_fmtContext, AVMEDIA_TYPE_VIDEO, -1, -1, 0, 0);
if (_streamId < 0) {
LOG(("Gif Error: Unable to av_find_best_stream %1, error %2, %3").arg(logData()).arg(_streamId).arg(av_make_error_string(err, sizeof(err), _streamId)));
return false;
}
_packetNull.stream_index = _streamId;
// Get a pointer to the codec context for the video stream
_codecContext = _fmtContext->streams[_streamId]->codec;
_codec = avcodec_find_decoder(_codecContext->codec_id);
_audioStreamId = av_find_best_stream(_fmtContext, AVMEDIA_TYPE_AUDIO, -1, -1, 0, 0);
if (_mode == Mode::OnlyGifv) {
if (_audioStreamId >= 0) { // should be no audio stream
return false;
}
if (dataSize() > AnimationInMemory) {
return false;
}
if (_codecContext->codec_id != AV_CODEC_ID_H264) {
return false;
}
} else if (_mode == Mode::Silent || !audioPlayer() || !_playId) {
_audioStreamId = -1;
}
av_opt_set_int(_codecContext, "refcounted_frames", 1, 0);
if ((res = avcodec_open2(_codecContext, _codec, 0)) < 0) {
LOG(("Gif Error: Unable to avcodec_open2 %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
return false;
}
std_::unique_ptr<VideoSoundData> soundData;
if (_audioStreamId >= 0) {
// Get a pointer to the codec context for the audio stream
auto audioContextOriginal = _fmtContext->streams[_audioStreamId]->codec;
auto audioCodec = avcodec_find_decoder(audioContextOriginal->codec_id);
AVCodecContext *audioContext = avcodec_alloc_context3(audioCodec);
if ((res = avcodec_copy_context(audioContext, audioContextOriginal)) != 0) {
LOG(("Gif Error: Unable to avcodec_open2 %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
return false;
}
av_opt_set_int(audioContext, "refcounted_frames", 1, 0);
if ((res = avcodec_open2(audioContext, audioCodec, 0)) < 0) {
avcodec_free_context(&audioContext);
LOG(("Gif Error: Unable to avcodec_open2 %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
_audioStreamId = -1;
} else {
soundData = std_::make_unique<VideoSoundData>();
soundData->context = audioContext;
soundData->frequency = audioContextOriginal->sample_rate;
if (_fmtContext->streams[_audioStreamId]->duration == AV_NOPTS_VALUE) {
soundData->length = (_fmtContext->duration * soundData->frequency) / AV_TIME_BASE;
} else {
soundData->length = (_fmtContext->streams[_audioStreamId]->duration * soundData->frequency * _fmtContext->streams[_audioStreamId]->time_base.num) / _fmtContext->streams[_audioStreamId]->time_base.den;
}
}
}
if (positionMs > 0) {
int64 ts = (positionMs * _fmtContext->streams[_streamId]->time_base.den) / (1000LL * _fmtContext->streams[_streamId]->time_base.num);
if (av_seek_frame(_fmtContext, _streamId, ts, 0) < 0) {
if (av_seek_frame(_fmtContext, _streamId, ts, AVSEEK_FLAG_BACKWARD) < 0) {
return false;
}
}
}
AVPacket packet;
auto readResult = readPacket(&packet);
if (readResult == PacketResult::Ok && positionMs > 0) {
positionMs = countPacketMs(&packet);
}
if (_audioStreamId >= 0) {
int64 position = (positionMs * soundData->frequency) / 1000LL;
audioPlayer()->initFromVideo(_playId, std_::move(soundData), position);
}
if (readResult == PacketResult::Ok) {
processPacket(&packet);
}
return true;
}
QString FFMpegReaderImplementation::logData() const {
return qsl("for file '%1', data size '%2'").arg(_location ? _location->name() : QString()).arg(_data->size());
}
FFMpegReaderImplementation::~FFMpegReaderImplementation() {
if (_audioStreamId >= 0) {
audioPlayer()->stopFromVideo(_playId);
}
clearPacketQueue();
if (_frameRead) {
av_frame_unref(_frame);
_frameRead = false;
}
if (_codecContext) avcodec_close(_codecContext);
if (_swsContext) sws_freeContext(_swsContext);
if (_opened) {
avformat_close_input(&_fmtContext);
}
if (_ioContext) {
av_free(_ioContext->buffer);
av_free(_ioContext);
} else if (_ioBuffer) {
av_free(_ioBuffer);
}
if (_fmtContext) avformat_free_context(_fmtContext);
av_frame_free(&_frame);
}
FFMpegReaderImplementation::PacketResult FFMpegReaderImplementation::readPacket(AVPacket *packet) {
av_init_packet(packet);
packet->data = nullptr;
packet->size = 0;
int res = 0;
if ((res = av_read_frame(_fmtContext, packet)) < 0) {
if (res == AVERROR_EOF) {
if (_audioStreamId >= 0) {
// queue terminating packet to audio player
VideoSoundPart part;
part.packet = &_packetNull;
part.videoPlayId = _playId;
audioPlayer()->feedFromVideo(std_::move(part));
}
return PacketResult::EndOfFile;
}
char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
LOG(("Gif Error: Unable to av_read_frame() %1, error %2, %3").arg(logData()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
return PacketResult::Error;
}
return PacketResult::Ok;
}
void FFMpegReaderImplementation::processPacket(AVPacket *packet) {
bool videoPacket = (packet->stream_index == _streamId);
bool audioPacket = (_audioStreamId >= 0 && packet->stream_index == _audioStreamId);
if (audioPacket || videoPacket) {
if (videoPacket) {
_lastReadVideoMs = countPacketMs(packet);
_packetQueue.enqueue(FFMpeg::dataWrapFromPacket(*packet));
} else if (audioPacket) {
_lastReadAudioMs = countPacketMs(packet);
// queue packet to audio player
VideoSoundPart part;
part.packet = packet;
part.videoPlayId = _playId;
audioPlayer()->feedFromVideo(std_::move(part));
}
} else {
av_packet_unref(packet);
}
}
int64 FFMpegReaderImplementation::countPacketMs(AVPacket *packet) const {
int64 packetPts = (packet->pts == AV_NOPTS_VALUE) ? packet->dts : packet->pts;
int64 packetMs = (packetPts * 1000LL * _fmtContext->streams[packet->stream_index]->time_base.num) / _fmtContext->streams[packet->stream_index]->time_base.den;
return packetMs;
}
FFMpegReaderImplementation::PacketResult FFMpegReaderImplementation::readAndProcessPacket() {
AVPacket packet;
auto result = readPacket(&packet);
if (result == PacketResult::Ok) {
processPacket(&packet);
}
return result;
}
void FFMpegReaderImplementation::startPacket() {
if (!_packetStarted && !_packetQueue.isEmpty()) {
AVPacket packet;
FFMpeg::packetFromDataWrap(packet, _packetQueue.head());
_packetStartedSize = packet.size;
_packetStartedData = packet.data;
_packetStarted = true;
}
}
void FFMpegReaderImplementation::finishPacket() {
if (_packetStarted) {
AVPacket packet;
FFMpeg::packetFromDataWrap(packet, _packetQueue.head());
packet.size = _packetStartedSize;
packet.data = _packetStartedData;
_packetStarted = false;
av_packet_unref(&packet);
_packetQueue.dequeue();
}
}
void FFMpegReaderImplementation::clearPacketQueue() {
finishPacket();
auto packets = createAndSwap(_packetQueue);
for (auto &packetData : packets) {
AVPacket packet;
FFMpeg::packetFromDataWrap(packet, packetData);
av_packet_unref(&packet);
}
}
int FFMpegReaderImplementation::_read(void *opaque, uint8_t *buf, int buf_size) {
FFMpegReaderImplementation *l = reinterpret_cast<FFMpegReaderImplementation*>(opaque);
return int(l->_device->read((char*)(buf), buf_size));
}
int64_t FFMpegReaderImplementation::_seek(void *opaque, int64_t offset, int whence) {
FFMpegReaderImplementation *l = reinterpret_cast<FFMpegReaderImplementation*>(opaque);
switch (whence) {
case SEEK_SET: return l->_device->seek(offset) ? l->_device->pos() : -1;
case SEEK_CUR: return l->_device->seek(l->_device->pos() + offset) ? l->_device->pos() : -1;
case SEEK_END: return l->_device->seek(l->_device->size() + offset) ? l->_device->pos() : -1;
}
return -1;
}
} // namespace internal
} // namespace Clip
} // namespace Media