diff --git a/internal/protocols/webrtc/outgoing_track.go b/internal/protocols/webrtc/outgoing_track.go index efd79cb6..ae591832 100644 --- a/internal/protocols/webrtc/outgoing_track.go +++ b/internal/protocols/webrtc/outgoing_track.go @@ -92,15 +92,43 @@ func (t *OutgoingTrack) codecParameters() (webrtc.RTPCodecParameters, error) { }, nil case *format.G711: - if forma.SampleRate != 8000 { - return webrtc.RTPCodecParameters{}, fmt.Errorf("unsupported G711 sample rate") + // These are the sample rates and channels supported by Chrome. + // Different sample rates and channels can be streamed too but we don't want compatibility issues. + // https://webrtc.googlesource.com/src/+/refs/heads/main/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc#23 + if forma.ClockRate() != 8000 && forma.ClockRate() != 16000 && + forma.ClockRate() != 32000 && forma.ClockRate() != 48000 { + return webrtc.RTPCodecParameters{}, fmt.Errorf("unsupported clock rate: %d", forma.ClockRate()) + } + if forma.ChannelCount != 1 && forma.ChannelCount != 2 { + return webrtc.RTPCodecParameters{}, fmt.Errorf("unsupported channel count: %d", forma.ChannelCount) } - if forma.MULaw { - if forma.ChannelCount != 1 { + if forma.SampleRate == 8000 { + if forma.MULaw { + if forma.ChannelCount != 1 { + return webrtc.RTPCodecParameters{ + RTPCodecCapability: webrtc.RTPCodecCapability{ + MimeType: webrtc.MimeTypePCMU, + ClockRate: uint32(forma.SampleRate), + Channels: uint16(forma.ChannelCount), + }, + PayloadType: 96, + }, nil + } + return webrtc.RTPCodecParameters{ RTPCodecCapability: webrtc.RTPCodecCapability{ MimeType: webrtc.MimeTypePCMU, + ClockRate: 8000, + }, + PayloadType: 0, + }, nil + } + + if forma.ChannelCount != 1 { + return webrtc.RTPCodecParameters{ + RTPCodecCapability: webrtc.RTPCodecCapability{ + MimeType: webrtc.MimeTypePCMA, ClockRate: uint32(forma.SampleRate), Channels: uint16(forma.ChannelCount), }, @@ -110,30 +138,20 @@ func (t *OutgoingTrack) codecParameters() (webrtc.RTPCodecParameters, error) { return webrtc.RTPCodecParameters{ RTPCodecCapability: webrtc.RTPCodecCapability{ - MimeType: webrtc.MimeTypePCMU, + MimeType: webrtc.MimeTypePCMA, ClockRate: 8000, }, - PayloadType: 0, - }, nil - } - - if forma.ChannelCount != 1 { - return webrtc.RTPCodecParameters{ - RTPCodecCapability: webrtc.RTPCodecCapability{ - MimeType: webrtc.MimeTypePCMA, - ClockRate: uint32(forma.SampleRate), - Channels: uint16(forma.ChannelCount), - }, - PayloadType: 96, + PayloadType: 8, }, nil } return webrtc.RTPCodecParameters{ RTPCodecCapability: webrtc.RTPCodecCapability{ - MimeType: webrtc.MimeTypePCMA, - ClockRate: 8000, + MimeType: mimeTypeL16, + ClockRate: uint32(forma.ClockRate()), + Channels: uint16(forma.ChannelCount), }, - PayloadType: 8, + PayloadType: 96, }, nil case *format.LPCM: @@ -141,10 +159,13 @@ func (t *OutgoingTrack) codecParameters() (webrtc.RTPCodecParameters, error) { return webrtc.RTPCodecParameters{}, fmt.Errorf("unsupported LPCM bit depth: %d", forma.BitDepth) } - if forma.ClockRate() != 8000 && forma.ClockRate() != 16000 && forma.ClockRate() != 48000 { + // These are the sample rates and channels supported by Chrome. + // Different sample rates and channels can be streamed too but we don't want compatibility issues. + // https://webrtc.googlesource.com/src/+/refs/heads/main/modules/audio_coding/codecs/pcm16b/audio_decoder_pcm16b.cc#23 + if forma.ClockRate() != 8000 && forma.ClockRate() != 16000 && + forma.ClockRate() != 32000 && forma.ClockRate() != 48000 { return webrtc.RTPCodecParameters{}, fmt.Errorf("unsupported clock rate: %d", forma.ClockRate()) } - if forma.ChannelCount != 1 && forma.ChannelCount != 2 { return webrtc.RTPCodecParameters{}, fmt.Errorf("unsupported channel count: %d", forma.ChannelCount) } diff --git a/internal/protocols/webrtc/peer_connection_test.go b/internal/protocols/webrtc/peer_connection_test.go index c85ff31c..a737bc45 100644 --- a/internal/protocols/webrtc/peer_connection_test.go +++ b/internal/protocols/webrtc/peer_connection_test.go @@ -114,7 +114,35 @@ func TestPeerConnectionPublishRead(t *testing.T) { &format.G722{}, }, { - "g711 pcma stereo", + "g711 pcma 8khz mono", + &format.G711{ + PayloadTyp: 8, + SampleRate: 8000, + ChannelCount: 1, + }, + &format.G711{ + PayloadTyp: 8, + SampleRate: 8000, + ChannelCount: 1, + }, + }, + { + "g711 pcmu 8khz mono", + &format.G711{ + MULaw: true, + PayloadTyp: 0, + SampleRate: 8000, + ChannelCount: 1, + }, + &format.G711{ + MULaw: true, + PayloadTyp: 0, + SampleRate: 8000, + ChannelCount: 1, + }, + }, + { + "g711 pcma 8khz stereo", &format.G711{ PayloadTyp: 96, SampleRate: 8000, @@ -127,7 +155,7 @@ func TestPeerConnectionPublishRead(t *testing.T) { }, }, { - "g711 pcmu stereo", + "g711 pcmu 8khz stereo", &format.G711{ MULaw: true, PayloadTyp: 96, @@ -142,35 +170,36 @@ func TestPeerConnectionPublishRead(t *testing.T) { }, }, { - "g711 pcma mono", + "g711 pcma 16khz stereo", &format.G711{ - PayloadTyp: 8, - SampleRate: 8000, - ChannelCount: 1, + PayloadTyp: 96, + SampleRate: 16000, + ChannelCount: 2, }, - &format.G711{ - PayloadTyp: 8, - SampleRate: 8000, - ChannelCount: 1, + &format.LPCM{ + PayloadTyp: 96, + BitDepth: 16, + SampleRate: 16000, + ChannelCount: 2, }, }, { - "g711 pcmu mono", + "g711 pcmu 16khz stereo", &format.G711{ MULaw: true, - PayloadTyp: 0, - SampleRate: 8000, - ChannelCount: 1, + PayloadTyp: 96, + SampleRate: 16000, + ChannelCount: 2, }, - &format.G711{ - MULaw: true, - PayloadTyp: 0, - SampleRate: 8000, - ChannelCount: 1, + &format.LPCM{ + PayloadTyp: 96, + BitDepth: 16, + SampleRate: 16000, + ChannelCount: 2, }, }, { - "l16 8000 stereo", + "l16 8khz stereo", &format.LPCM{ PayloadTyp: 96, BitDepth: 16, @@ -185,7 +214,7 @@ func TestPeerConnectionPublishRead(t *testing.T) { }, }, { - "l16 16000 stereo", + "l16 16khz stereo", &format.LPCM{ PayloadTyp: 96, BitDepth: 16, diff --git a/internal/servers/webrtc/server_test.go b/internal/servers/webrtc/server_test.go index c5682c34..0167fe06 100644 --- a/internal/servers/webrtc/server_test.go +++ b/internal/servers/webrtc/server_test.go @@ -431,7 +431,7 @@ func TestServerRead(t *testing.T) { []byte{1, 2}, },*/ { - "g711", + "g711 8khz mono", []*description.Media{{ Type: description.MediaTypeAudio, Formats: []format.Format{&format.G711{ @@ -445,6 +445,21 @@ func TestServerRead(t *testing.T) { }, []byte{1, 2, 3}, }, + { + "g711 16khz stereo", + []*description.Media{{ + Type: description.MediaTypeAudio, + Formats: []format.Format{&format.G711{ + MULaw: true, + SampleRate: 16000, + ChannelCount: 2, + }}, + }}, + &unit.G711{ + Samples: []byte{1, 2, 3, 4}, + }, + []byte{0x86, 0x84, 0x8a, 0x84, 0x8e, 0x84, 0x92, 0x84}, + }, { "lpcm", []*description.Media{{ diff --git a/internal/servers/webrtc/session.go b/internal/servers/webrtc/session.go index ea7f1a49..421b4a18 100644 --- a/internal/servers/webrtc/session.go +++ b/internal/servers/webrtc/session.go @@ -2,6 +2,7 @@ package webrtc import ( "context" + "crypto/rand" "encoding/hex" "errors" "fmt" @@ -18,6 +19,7 @@ import ( "github.com/bluenviron/gortsplib/v4/pkg/format/rtpvp8" "github.com/bluenviron/gortsplib/v4/pkg/format/rtpvp9" "github.com/bluenviron/gortsplib/v4/pkg/rtptime" + "github.com/bluenviron/mediacommon/pkg/codecs/g711" "github.com/google/uuid" "github.com/pion/ice/v2" "github.com/pion/sdp/v3" @@ -43,6 +45,15 @@ func uint16Ptr(v uint16) *uint16 { return &v } +func randUint32() (uint32, error) { + var b [4]byte + _, err := rand.Read(b[:]) + if err != nil { + return 0, err + } + return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]), nil +} + func findVideoTrack( stream *stream.Stream, writer *asyncwriter.Writer, @@ -254,13 +265,72 @@ func findAudioTrack( if g711Format != nil { return g711Format, func(track *webrtc.OutgoingTrack) error { - stream.AddReader(writer, media, g711Format, func(u unit.Unit) error { - for _, pkt := range u.GetRTPPackets() { - track.WriteRTP(pkt) //nolint:errcheck + if g711Format.SampleRate == 8000 { + curTimestamp, err := randUint32() + if err != nil { + return err } - return nil - }) + stream.AddReader(writer, media, g711Format, func(u unit.Unit) error { + for _, pkt := range u.GetRTPPackets() { + // recompute timestamp from scratch. + // Chrome requires a precise timestamp that FFmpeg doesn't provide. + pkt.Timestamp = curTimestamp + curTimestamp += uint32(len(pkt.Payload)) / uint32(g711Format.ChannelCount) + + track.WriteRTP(pkt) //nolint:errcheck + } + + return nil + }) + } else { + encoder := &rtplpcm.Encoder{ + PayloadType: 96, + PayloadMaxSize: webrtcPayloadMaxSize, + BitDepth: 16, + ChannelCount: g711Format.ChannelCount, + } + err := encoder.Init() + if err != nil { + return err + } + + curTimestamp, err := randUint32() + if err != nil { + return err + } + + stream.AddReader(writer, media, g711Format, func(u unit.Unit) error { + tunit := u.(*unit.G711) + + if tunit.Samples == nil { + return nil + } + + var lpcmSamples []byte + if g711Format.MULaw { + lpcmSamples = g711.DecodeMulaw(tunit.Samples) + } else { + lpcmSamples = g711.DecodeAlaw(tunit.Samples) + } + + packets, err := encoder.Encode(lpcmSamples) + if err != nil { + return nil //nolint:nilerr + } + + for _, pkt := range packets { + // recompute timestamp from scratch. + // Chrome requires a precise timestamp that FFmpeg doesn't provide. + pkt.Timestamp = curTimestamp + curTimestamp += uint32(len(pkt.Payload)) / 2 / uint32(g711Format.ChannelCount) + + track.WriteRTP(pkt) //nolint:errcheck + } + + return nil + }) + } return nil } } @@ -281,6 +351,11 @@ func findAudioTrack( return err } + curTimestamp, err := randUint32() + if err != nil { + return err + } + stream.AddReader(writer, media, lpcmFormat, func(u unit.Unit) error { tunit := u.(*unit.LPCM) @@ -294,7 +369,11 @@ func findAudioTrack( } for _, pkt := range packets { - pkt.Timestamp += tunit.RTPPackets[0].Timestamp + // recompute timestamp from scratch. + // Chrome requires a precise timestamp that FFmpeg doesn't provide. + pkt.Timestamp = curTimestamp + curTimestamp += uint32(len(pkt.Payload)) / 2 / uint32(lpcmFormat.ChannelCount) + track.WriteRTP(pkt) //nolint:errcheck }