mediamtx/internal/record/format_fmp4.go

849 lines
19 KiB
Go

package record
import (
"bytes"
"fmt"
"time"
rtspformat "github.com/bluenviron/gortsplib/v4/pkg/format"
"github.com/bluenviron/mediacommon/pkg/codecs/ac3"
"github.com/bluenviron/mediacommon/pkg/codecs/av1"
"github.com/bluenviron/mediacommon/pkg/codecs/g711"
"github.com/bluenviron/mediacommon/pkg/codecs/h264"
"github.com/bluenviron/mediacommon/pkg/codecs/h265"
"github.com/bluenviron/mediacommon/pkg/codecs/jpeg"
"github.com/bluenviron/mediacommon/pkg/codecs/mpeg1audio"
"github.com/bluenviron/mediacommon/pkg/codecs/mpeg4audio"
"github.com/bluenviron/mediacommon/pkg/codecs/mpeg4video"
"github.com/bluenviron/mediacommon/pkg/codecs/opus"
"github.com/bluenviron/mediacommon/pkg/codecs/vp9"
"github.com/bluenviron/mediacommon/pkg/formats/fmp4"
"github.com/bluenviron/mediamtx/internal/defs"
"github.com/bluenviron/mediamtx/internal/logger"
"github.com/bluenviron/mediamtx/internal/test"
"github.com/bluenviron/mediamtx/internal/unit"
)
func durationGoToMp4(v time.Duration, timeScale uint32) uint64 {
timeScale64 := uint64(timeScale)
secs := v / time.Second
dec := v % time.Second
return uint64(secs)*timeScale64 + uint64(dec)*timeScale64/uint64(time.Second)
}
func mpeg1audioChannelCount(cm mpeg1audio.ChannelMode) int {
switch cm {
case mpeg1audio.ChannelModeStereo,
mpeg1audio.ChannelModeJointStereo,
mpeg1audio.ChannelModeDualChannel:
return 2
default:
return 1
}
}
func jpegExtractSize(image []byte) (int, int, error) {
l := len(image)
if l < 2 || image[0] != 0xFF || image[1] != jpeg.MarkerStartOfImage {
return 0, 0, fmt.Errorf("invalid header")
}
image = image[2:]
for {
if len(image) < 2 {
return 0, 0, fmt.Errorf("not enough bits")
}
h0, h1 := image[0], image[1]
image = image[2:]
if h0 != 0xFF {
return 0, 0, fmt.Errorf("invalid image")
}
switch h1 {
case 0xE0, 0xE1, 0xE2, // JFIF
jpeg.MarkerDefineHuffmanTable,
jpeg.MarkerComment,
jpeg.MarkerDefineQuantizationTable,
jpeg.MarkerDefineRestartInterval:
mlen := int(image[0])<<8 | int(image[1])
if len(image) < mlen {
return 0, 0, fmt.Errorf("not enough bits")
}
image = image[mlen:]
case jpeg.MarkerStartOfFrame1:
mlen := int(image[0])<<8 | int(image[1])
if len(image) < mlen {
return 0, 0, fmt.Errorf("not enough bits")
}
var sof jpeg.StartOfFrame1
err := sof.Unmarshal(image[2:mlen])
if err != nil {
return 0, 0, err
}
return sof.Width, sof.Height, nil
case jpeg.MarkerStartOfScan:
return 0, 0, fmt.Errorf("SOF not found")
default:
return 0, 0, fmt.Errorf("unknown marker: 0x%.2x", h1)
}
}
}
type formatFMP4 struct {
a *agentInstance
tracks []*formatFMP4Track
hasVideo bool
currentSegment *formatFMP4Segment
nextSequenceNumber uint32
}
func (f *formatFMP4) initialize() {
nextID := 1
var formats []rtspformat.Format
addTrack := func(format rtspformat.Format, codec fmp4.Codec) *formatFMP4Track {
initTrack := &fmp4.InitTrack{
TimeScale: uint32(format.ClockRate()),
Codec: codec,
}
initTrack.ID = nextID
nextID++
track := &formatFMP4Track{
f: f,
initTrack: initTrack,
}
f.tracks = append(f.tracks, track)
formats = append(formats, format)
return track
}
updateCodecs := func() {
// if codec parameters have been updated,
// and current segment has already written codec parameters on disk,
// close current segment.
if f.currentSegment != nil && f.currentSegment.fi != nil {
f.currentSegment.close() //nolint:errcheck
f.currentSegment = nil
}
}
for _, media := range f.a.agent.Stream.Desc().Medias {
for _, forma := range media.Formats {
switch forma := forma.(type) {
case *rtspformat.AV1:
codec := &fmp4.CodecAV1{
SequenceHeader: []byte{
8, 0, 0, 0, 66, 167, 191, 228, 96, 13, 0, 64,
},
}
track := addTrack(forma, codec)
firstReceived := false
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.AV1)
if tunit.TU == nil {
return nil
}
randomAccess := false
for _, obu := range tunit.TU {
var h av1.OBUHeader
err := h.Unmarshal(obu)
if err != nil {
return err
}
if h.Type == av1.OBUTypeSequenceHeader {
if !bytes.Equal(codec.SequenceHeader, obu) {
codec.SequenceHeader = obu
updateCodecs()
}
randomAccess = true
}
}
if !firstReceived {
if !randomAccess {
return nil
}
firstReceived = true
}
sampl, err := fmp4.NewPartSampleAV1(
randomAccess,
tunit.TU)
if err != nil {
return err
}
return track.record(&sample{
PartSample: sampl,
dts: tunit.PTS,
ntp: tunit.NTP,
})
})
case *rtspformat.VP9:
codec := &fmp4.CodecVP9{
Width: 1280,
Height: 720,
Profile: 1,
BitDepth: 8,
ChromaSubsampling: 1,
ColorRange: false,
}
track := addTrack(forma, codec)
firstReceived := false
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.VP9)
if tunit.Frame == nil {
return nil
}
var h vp9.Header
err := h.Unmarshal(tunit.Frame)
if err != nil {
return err
}
randomAccess := false
if !h.NonKeyFrame {
randomAccess = true
if w := h.Width(); codec.Width != w {
codec.Width = w
updateCodecs()
}
if h := h.Width(); codec.Height != h {
codec.Height = h
updateCodecs()
}
if codec.Profile != h.Profile {
codec.Profile = h.Profile
updateCodecs()
}
if codec.BitDepth != h.ColorConfig.BitDepth {
codec.BitDepth = h.ColorConfig.BitDepth
updateCodecs()
}
if c := h.ChromaSubsampling(); codec.ChromaSubsampling != c {
codec.ChromaSubsampling = c
updateCodecs()
}
if codec.ColorRange != h.ColorConfig.ColorRange {
codec.ColorRange = h.ColorConfig.ColorRange
updateCodecs()
}
}
if !firstReceived {
if !randomAccess {
return nil
}
firstReceived = true
}
return track.record(&sample{
PartSample: &fmp4.PartSample{
IsNonSyncSample: !randomAccess,
Payload: tunit.Frame,
},
dts: tunit.PTS,
ntp: tunit.NTP,
})
})
case *rtspformat.VP8:
// TODO
case *rtspformat.H265:
vps, sps, pps := forma.SafeParams()
if vps == nil || sps == nil || pps == nil {
vps = []byte{
0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x02, 0x20,
0x00, 0x00, 0x03, 0x00, 0xb0, 0x00, 0x00, 0x03,
0x00, 0x00, 0x03, 0x00, 0x7b, 0x18, 0xb0, 0x24,
}
sps = []byte{
0x42, 0x01, 0x01, 0x02, 0x20, 0x00, 0x00, 0x03,
0x00, 0xb0, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03,
0x00, 0x7b, 0xa0, 0x07, 0x82, 0x00, 0x88, 0x7d,
0xb6, 0x71, 0x8b, 0x92, 0x44, 0x80, 0x53, 0x88,
0x88, 0x92, 0xcf, 0x24, 0xa6, 0x92, 0x72, 0xc9,
0x12, 0x49, 0x22, 0xdc, 0x91, 0xaa, 0x48, 0xfc,
0xa2, 0x23, 0xff, 0x00, 0x01, 0x00, 0x01, 0x6a,
0x02, 0x02, 0x02, 0x01,
}
pps = []byte{
0x44, 0x01, 0xc0, 0x25, 0x2f, 0x05, 0x32, 0x40,
}
}
codec := &fmp4.CodecH265{
VPS: vps,
SPS: sps,
PPS: pps,
}
track := addTrack(forma, codec)
var dtsExtractor *h265.DTSExtractor
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.H265)
if tunit.AU == nil {
return nil
}
randomAccess := false
for _, nalu := range tunit.AU {
typ := h265.NALUType((nalu[0] >> 1) & 0b111111)
switch typ {
case h265.NALUType_VPS_NUT:
if !bytes.Equal(codec.VPS, nalu) {
codec.VPS = nalu
updateCodecs()
}
case h265.NALUType_SPS_NUT:
if !bytes.Equal(codec.SPS, nalu) {
codec.SPS = nalu
updateCodecs()
}
case h265.NALUType_PPS_NUT:
if !bytes.Equal(codec.PPS, nalu) {
codec.PPS = nalu
updateCodecs()
}
case h265.NALUType_IDR_W_RADL, h265.NALUType_IDR_N_LP, h265.NALUType_CRA_NUT:
randomAccess = true
}
}
if dtsExtractor == nil {
if !randomAccess {
return nil
}
dtsExtractor = h265.NewDTSExtractor()
}
dts, err := dtsExtractor.Extract(tunit.AU, tunit.PTS)
if err != nil {
return err
}
sampl, err := fmp4.NewPartSampleH26x(
int32(durationGoToMp4(tunit.PTS-dts, 90000)),
randomAccess,
tunit.AU)
if err != nil {
return err
}
return track.record(&sample{
PartSample: sampl,
dts: dts,
ntp: tunit.NTP,
})
})
case *rtspformat.H264:
sps, pps := forma.SafeParams()
if sps == nil || pps == nil {
sps = test.FormatH264.SPS
pps = test.FormatH264.PPS
}
codec := &fmp4.CodecH264{
SPS: sps,
PPS: pps,
}
track := addTrack(forma, codec)
var dtsExtractor *h264.DTSExtractor
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.H264)
if tunit.AU == nil {
return nil
}
randomAccess := false
for _, nalu := range tunit.AU {
typ := h264.NALUType(nalu[0] & 0x1F)
switch typ {
case h264.NALUTypeSPS:
if !bytes.Equal(codec.SPS, nalu) {
codec.SPS = nalu
updateCodecs()
}
case h264.NALUTypePPS:
if !bytes.Equal(codec.PPS, nalu) {
codec.PPS = nalu
updateCodecs()
}
case h264.NALUTypeIDR:
randomAccess = true
}
}
if dtsExtractor == nil {
if !randomAccess {
return nil
}
dtsExtractor = h264.NewDTSExtractor()
}
dts, err := dtsExtractor.Extract(tunit.AU, tunit.PTS)
if err != nil {
return err
}
sampl, err := fmp4.NewPartSampleH26x(
int32(durationGoToMp4(tunit.PTS-dts, 90000)),
randomAccess,
tunit.AU)
if err != nil {
return err
}
return track.record(&sample{
PartSample: sampl,
dts: dts,
ntp: tunit.NTP,
})
})
case *rtspformat.MPEG4Video:
config := forma.SafeParams()
if config == nil {
config = []byte{
0x00, 0x00, 0x01, 0xb0, 0x01, 0x00, 0x00, 0x01,
0xb5, 0x89, 0x13, 0x00, 0x00, 0x01, 0x00, 0x00,
0x00, 0x01, 0x20, 0x00, 0xc4, 0x8d, 0x88, 0x00,
0xf5, 0x3c, 0x04, 0x87, 0x14, 0x63, 0x00, 0x00,
0x01, 0xb2, 0x4c, 0x61, 0x76, 0x63, 0x35, 0x38,
0x2e, 0x31, 0x33, 0x34, 0x2e, 0x31, 0x30, 0x30,
}
}
codec := &fmp4.CodecMPEG4Video{
Config: config,
}
track := addTrack(forma, codec)
firstReceived := false
var lastPTS time.Duration
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.MPEG4Video)
if tunit.Frame == nil {
return nil
}
randomAccess := bytes.Contains(tunit.Frame, []byte{0, 0, 1, byte(mpeg4video.GroupOfVOPStartCode)})
if bytes.HasPrefix(tunit.Frame, []byte{0, 0, 1, byte(mpeg4video.VisualObjectSequenceStartCode)}) {
end := bytes.Index(tunit.Frame[4:], []byte{0, 0, 1, byte(mpeg4video.GroupOfVOPStartCode)})
if end >= 0 {
config := tunit.Frame[:end+4]
if !bytes.Equal(codec.Config, config) {
codec.Config = config
updateCodecs()
}
}
}
if !firstReceived {
if !randomAccess {
return nil
}
firstReceived = true
} else if tunit.PTS < lastPTS {
return fmt.Errorf("MPEG-4 Video streams with B-frames are not supported (yet)")
}
lastPTS = tunit.PTS
return track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: tunit.Frame,
IsNonSyncSample: !randomAccess,
},
dts: tunit.PTS,
ntp: tunit.NTP,
})
})
case *rtspformat.MPEG1Video:
codec := &fmp4.CodecMPEG1Video{
Config: []byte{
0x00, 0x00, 0x01, 0xb3, 0x78, 0x04, 0x38, 0x35,
0xff, 0xff, 0xe0, 0x18, 0x00, 0x00, 0x01, 0xb5,
0x14, 0x4a, 0x00, 0x01, 0x00, 0x00,
},
}
track := addTrack(forma, codec)
firstReceived := false
var lastPTS time.Duration
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.MPEG1Video)
if tunit.Frame == nil {
return nil
}
randomAccess := bytes.Contains(tunit.Frame, []byte{0, 0, 1, 0xB8})
if bytes.HasPrefix(tunit.Frame, []byte{0, 0, 1, 0xB3}) {
end := bytes.Index(tunit.Frame[4:], []byte{0, 0, 1, 0xB8})
if end >= 0 {
config := tunit.Frame[:end+4]
if !bytes.Equal(codec.Config, config) {
codec.Config = config
updateCodecs()
}
}
}
if !firstReceived {
if !randomAccess {
return nil
}
firstReceived = true
} else if tunit.PTS < lastPTS {
return fmt.Errorf("MPEG-1 Video streams with B-frames are not supported (yet)")
}
lastPTS = tunit.PTS
return track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: tunit.Frame,
IsNonSyncSample: !randomAccess,
},
dts: tunit.PTS,
ntp: tunit.NTP,
})
})
case *rtspformat.MJPEG:
codec := &fmp4.CodecMJPEG{
Width: 800,
Height: 600,
}
track := addTrack(forma, codec)
parsed := false
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.MJPEG)
if tunit.Frame == nil {
return nil
}
if !parsed {
parsed = true
width, height, err := jpegExtractSize(tunit.Frame)
if err != nil {
return err
}
codec.Width = width
codec.Height = height
updateCodecs()
}
return track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: tunit.Frame,
},
dts: tunit.PTS,
ntp: tunit.NTP,
})
})
case *rtspformat.Opus:
codec := &fmp4.CodecOpus{
ChannelCount: func() int {
if forma.IsStereo {
return 2
}
return 1
}(),
}
track := addTrack(forma, codec)
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.Opus)
if tunit.Packets == nil {
return nil
}
var dt time.Duration
for _, packet := range tunit.Packets {
err := track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: packet,
},
dts: tunit.PTS + dt,
ntp: tunit.NTP.Add(dt),
})
if err != nil {
return err
}
dt += opus.PacketDuration(packet)
}
return nil
})
case *rtspformat.MPEG4Audio:
codec := &fmp4.CodecMPEG4Audio{
Config: *forma.GetConfig(),
}
track := addTrack(forma, codec)
sampleRate := time.Duration(forma.ClockRate())
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.MPEG4Audio)
if tunit.AUs == nil {
return nil
}
for i, au := range tunit.AUs {
dt := time.Duration(i) * mpeg4audio.SamplesPerAccessUnit *
time.Second / sampleRate
err := track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: au,
},
dts: tunit.PTS + dt,
ntp: tunit.NTP.Add(dt),
})
if err != nil {
return err
}
}
return nil
})
case *rtspformat.MPEG1Audio:
codec := &fmp4.CodecMPEG1Audio{
SampleRate: 32000,
ChannelCount: 2,
}
track := addTrack(forma, codec)
parsed := false
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.MPEG1Audio)
if tunit.Frames == nil {
return nil
}
var dt time.Duration
for _, frame := range tunit.Frames {
var h mpeg1audio.FrameHeader
err := h.Unmarshal(frame)
if err != nil {
return err
}
if !parsed {
parsed = true
codec.SampleRate = h.SampleRate
codec.ChannelCount = mpeg1audioChannelCount(h.ChannelMode)
updateCodecs()
}
err = track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: frame,
},
dts: tunit.PTS + tunit.PTS,
ntp: tunit.NTP,
})
if err != nil {
return err
}
dt += time.Duration(h.SampleCount()) *
time.Second / time.Duration(h.SampleRate)
}
return nil
})
case *rtspformat.AC3:
codec := &fmp4.CodecAC3{
SampleRate: forma.SampleRate,
ChannelCount: forma.ChannelCount,
Fscod: 0,
Bsid: 8,
Bsmod: 0,
Acmod: 7,
LfeOn: true,
BitRateCode: 7,
}
track := addTrack(forma, codec)
parsed := false
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.AC3)
if tunit.Frames == nil {
return nil
}
for i, frame := range tunit.Frames {
var syncInfo ac3.SyncInfo
err := syncInfo.Unmarshal(frame)
if err != nil {
return fmt.Errorf("invalid AC-3 frame: %w", err)
}
var bsi ac3.BSI
err = bsi.Unmarshal(frame[5:])
if err != nil {
return fmt.Errorf("invalid AC-3 frame: %w", err)
}
if !parsed {
parsed = true
codec.SampleRate = syncInfo.SampleRate()
codec.ChannelCount = bsi.ChannelCount()
codec.Fscod = syncInfo.Fscod
codec.Bsid = bsi.Bsid
codec.Bsmod = bsi.Bsmod
codec.Acmod = bsi.Acmod
codec.LfeOn = bsi.LfeOn
codec.BitRateCode = syncInfo.Frmsizecod >> 1
updateCodecs()
}
dt := time.Duration(i) * time.Duration(ac3.SamplesPerFrame) *
time.Second / time.Duration(codec.SampleRate)
err = track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: frame,
},
dts: tunit.PTS + dt,
ntp: tunit.NTP.Add(dt),
})
if err != nil {
return err
}
}
return nil
})
case *rtspformat.G722:
// TODO
case *rtspformat.G711:
codec := &fmp4.CodecLPCM{
LittleEndian: false,
BitDepth: 16,
SampleRate: forma.SampleRate,
ChannelCount: forma.ChannelCount,
}
track := addTrack(forma, codec)
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.G711)
if tunit.Samples == nil {
return nil
}
var out []byte
if forma.MULaw {
out = g711.DecodeMulaw(tunit.Samples)
} else {
out = g711.DecodeAlaw(tunit.Samples)
}
return track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: out,
},
dts: tunit.PTS,
ntp: tunit.NTP,
})
})
case *rtspformat.LPCM:
codec := &fmp4.CodecLPCM{
LittleEndian: false,
BitDepth: forma.BitDepth,
SampleRate: forma.SampleRate,
ChannelCount: forma.ChannelCount,
}
track := addTrack(forma, codec)
f.a.agent.Stream.AddReader(f.a.writer, media, forma, func(u unit.Unit) error {
tunit := u.(*unit.LPCM)
if tunit.Samples == nil {
return nil
}
return track.record(&sample{
PartSample: &fmp4.PartSample{
Payload: tunit.Samples,
},
dts: tunit.PTS,
ntp: tunit.NTP,
})
})
}
}
}
f.a.agent.Log(logger.Info, "recording %s",
defs.FormatsInfo(formats))
}
func (f *formatFMP4) close() {
if f.currentSegment != nil {
f.currentSegment.close() //nolint:errcheck
}
}