tdesktop/Telegram/SourceFiles/media/media_child_ffmpeg_loader.cpp

/*
This file is part of Telegram Desktop,
the official desktop version of Telegram messaging app, see https://telegram.org

Telegram Desktop is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

It is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

In addition, as a special exception, the copyright holders give permission
to link the code of portions of this program with the OpenSSL library.

Full license: https://github.com/telegramdesktop/tdesktop/blob/master/LICENSE
Copyright (c) 2014-2016 John Preston, https://desktop.telegram.org
*/
#include "stdafx.h"
#include "media/media_child_ffmpeg_loader.h"

constexpr AVSampleFormat AudioToFormat = AV_SAMPLE_FMT_S16;
constexpr int64_t AudioToChannelLayout = AV_CH_LAYOUT_STEREO;
constexpr int32 AudioToChannels = 2;

VideoSoundData::~VideoSoundData() {
	if (context) {
		avcodec_close(context);
		avcodec_free_context(&context);
		context = nullptr;
	}
}

ChildFFMpegLoader::ChildFFMpegLoader(uint64 videoPlayId, std_::unique_ptr<VideoSoundData> &&data) : AudioPlayerLoader(FileLocation(), QByteArray())
, _videoPlayId(videoPlayId)
, _parentData(std_::move(data)) {
	_frame = av_frame_alloc();
}

bool ChildFFMpegLoader::open(qint64 &position) {
	int res = 0;
	char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };

	uint64_t layout = _parentData->context->channel_layout;
	_inputFormat = _parentData->context->sample_fmt;
	switch (layout) {
	case AV_CH_LAYOUT_MONO:
		switch (_inputFormat) {
		case AV_SAMPLE_FMT_U8:
		case AV_SAMPLE_FMT_U8P: _format = AL_FORMAT_MONO8; _sampleSize = 1; break;
		case AV_SAMPLE_FMT_S16:
		case AV_SAMPLE_FMT_S16P: _format = AL_FORMAT_MONO16; _sampleSize = sizeof(uint16); break;
		default:
			_sampleSize = -1; // convert needed
		break;
		}
	break;
	case AV_CH_LAYOUT_STEREO:
		switch (_inputFormat) {
		case AV_SAMPLE_FMT_U8: _format = AL_FORMAT_STEREO8; _sampleSize = 2; break;
		case AV_SAMPLE_FMT_S16: _format = AL_FORMAT_STEREO16; _sampleSize = 2 * sizeof(uint16); break;
		default:
			_sampleSize = -1; // convert needed
		break;
		}
	break;
	default:
		_sampleSize = -1; // convert needed
	break;
	}
	if (_parentData->frequency != 44100 && _parentData->frequency != 48000) {
		_sampleSize = -1; // convert needed
	}

	if (_sampleSize < 0) {
		_swrContext = swr_alloc();
		if (!_swrContext) {
			LOG(("Audio Error: Unable to swr_alloc for file '%1', data size '%2'").arg(file.name()).arg(data.size()));
			return false;
		}
		int64_t src_ch_layout = layout, dst_ch_layout = AudioToChannelLayout;
		_srcRate = _parentData->frequency;
		AVSampleFormat src_sample_fmt = _inputFormat, dst_sample_fmt = AudioToFormat;
		_dstRate = (_parentData->frequency != 44100 && _parentData->frequency != 48000) ? AudioVoiceMsgFrequency : _parentData->frequency;

		av_opt_set_int(_swrContext, "in_channel_layout", src_ch_layout, 0);
		av_opt_set_int(_swrContext, "in_sample_rate", _srcRate, 0);
		av_opt_set_sample_fmt(_swrContext, "in_sample_fmt", src_sample_fmt, 0);
		av_opt_set_int(_swrContext, "out_channel_layout", dst_ch_layout, 0);
		av_opt_set_int(_swrContext, "out_sample_rate", _dstRate, 0);
		av_opt_set_sample_fmt(_swrContext, "out_sample_fmt", dst_sample_fmt, 0);

		if ((res = swr_init(_swrContext)) < 0) {
			LOG(("Audio Error: Unable to swr_init for file '%1', data size '%2', error %3, %4").arg(file.name()).arg(data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
			return false;
		}

		_sampleSize = AudioToChannels * sizeof(short);
		_parentData->frequency = _dstRate;
		_parentData->length = av_rescale_rnd(_parentData->length, _dstRate, _srcRate, AV_ROUND_UP);
		position = av_rescale_rnd(position, _dstRate, _srcRate, AV_ROUND_DOWN);
		_format = AL_FORMAT_STEREO16;

		_maxResampleSamples = av_rescale_rnd(AVBlockSize / _sampleSize, _dstRate, _srcRate, AV_ROUND_UP);
		if ((res = av_samples_alloc_array_and_samples(&_dstSamplesData, 0, AudioToChannels, _maxResampleSamples, AudioToFormat, 0)) < 0) {
			LOG(("Audio Error: Unable to av_samples_alloc for file '%1', data size '%2', error %3, %4").arg(file.name()).arg(data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));
			return false;
		}
	}

	return true;
}

AudioPlayerLoader::ReadResult ChildFFMpegLoader::readMore(QByteArray &result, int64 &samplesAdded) {
	if (_queue.isEmpty()) {
		return _eofReached ? ReadResult::EndOfFile : ReadResult::Wait;
	}

	av_frame_unref(_frame);
	int got_frame = 0;
	int res = 0;

	AVPacket packet;
	FFMpeg::packetFromDataWrap(packet, _queue.dequeue());

	_eofReached = FFMpeg::isNullPacket(packet);
	if (_eofReached) {
		return ReadResult::EndOfFile;
	}

	if ((res = avcodec_decode_audio4(_parentData->context, _frame, &got_frame, &packet)) < 0) {
		char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
		LOG(("Audio Error: Unable to avcodec_decode_audio4() file '%1', data size '%2', error %3, %4").arg(file.name()).arg(data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));

		FFMpeg::freePacket(&packet);
		if (res == AVERROR_INVALIDDATA) {
			return ReadResult::NotYet; // try to skip bad packet
		}
		return ReadResult::Error;
	}

	if (got_frame) {
		if (_dstSamplesData) { // convert needed
			int64_t dstSamples = av_rescale_rnd(swr_get_delay(_swrContext, _srcRate) + _frame->nb_samples, _dstRate, _srcRate, AV_ROUND_UP);
			if (dstSamples > _maxResampleSamples) {
				_maxResampleSamples = dstSamples;
				av_free(_dstSamplesData[0]);

				if ((res = av_samples_alloc(_dstSamplesData, 0, AudioToChannels, _maxResampleSamples, AudioToFormat, 1)) < 0) {
					_dstSamplesData[0] = 0;
					char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
					LOG(("Audio Error: Unable to av_samples_alloc for file '%1', data size '%2', error %3, %4").arg(file.name()).arg(data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));

					FFMpeg::freePacket(&packet);
					return ReadResult::Error;
				}
			}
			if ((res = swr_convert(_swrContext, _dstSamplesData, dstSamples, (const uint8_t**)_frame->extended_data, _frame->nb_samples)) < 0) {
				char err[AV_ERROR_MAX_STRING_SIZE] = { 0 };
				LOG(("Audio Error: Unable to swr_convert for file '%1', data size '%2', error %3, %4").arg(file.name()).arg(data.size()).arg(res).arg(av_make_error_string(err, sizeof(err), res)));

				FFMpeg::freePacket(&packet);
				return ReadResult::Error;
			}
			int32 resultLen = av_samples_get_buffer_size(0, AudioToChannels, res, AudioToFormat, 1);
			result.append((const char*)_dstSamplesData[0], resultLen);
			samplesAdded += resultLen / _sampleSize;
		} else {
			result.append((const char*)_frame->extended_data[0], _frame->nb_samples * _sampleSize);
			samplesAdded += _frame->nb_samples;
		}
	}
	FFMpeg::freePacket(&packet);
	return ReadResult::Ok;
}

void ChildFFMpegLoader::enqueuePackets(QQueue<FFMpeg::AVPacketDataWrap> &packets) {
	_queue += std_::move(packets);
	packets.clear();
}

ChildFFMpegLoader::~ChildFFMpegLoader() {
	auto queue = createAndSwap(_queue);
	for (auto &packetData : queue) {
		AVPacket packet;
		FFMpeg::packetFromDataWrap(packet, packetData);
		FFMpeg::freePacket(&packet);
	}
	if (_swrContext) swr_free(&_swrContext);
	if (_dstSamplesData) {
		if (_dstSamplesData[0]) {
			av_freep(&_dstSamplesData[0]);
		}
		av_freep(&_dstSamplesData);
	}
	av_frame_free(&_frame);
}