avformat/mov: support cenc (common encryption)

support reading encrypted mp4 using aes-ctr, conforming to ISO/IEC 23001-7. a new parameter was added: - decryption_key - 128 bit decryption key (hex) Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
2015-12-07 12:30:50 +02:00 · 2015-12-07 12:30:50 +02:00 · 3f8564fe3c
parent 15e1fd9883
commit 3f8564fe3c
3 changed files with 195 additions and 0 deletions
--- a/1
+++ b/1
@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
 releases are sorted from youngest to oldest.

 version <next>:
+- Common Encryption (CENC) MP4 decoding support
 - Common Encryption (CENC) MP4 encoding support
 - DXV decoding
 - extrastereo filter
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@ -37,6 +37,8 @@ extern const AVCodecTag ff_codec_movsubtitle_tags[];
 int ff_mov_iso639_to_lang(const char lang[4], int mp4);
 int ff_mov_lang_to_iso639(unsigned code, char to[4]);

+struct AVAESCTR;
+
 /* the QuickTime file format is quite convoluted...
 * it has lots of index tables, each indexing something in another one...
 * Here we just use what is needed to read the chunks
@ -168,6 +170,15 @@ typedef struct MOVStreamContext {
    int64_t duration_for_fps;

    int32_t *display_matrix;
+    uint32_t format;
+
+    struct {
+        int use_subsamples;
+        uint8_t* auxiliary_info;
+        uint8_t* auxiliary_info_end;
+        uint8_t* auxiliary_info_pos;
+        struct AVAESCTR* aes_ctr;
+    } cenc;
 } MOVStreamContext;

 typedef struct MOVContext {
@ -214,6 +225,8 @@ typedef struct MOVContext {
    void *audible_fixed_key;
    int audible_fixed_key_size;
    struct AVAES *aes_decrypt;
+    uint8_t *decryption_key;
+    int decryption_key_len;
 } MOVContext;

 int ff_mp4_read_descr_len(AVIOContext *pb);
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@ -39,6 +39,7 @@
 #include "libavutil/display.h"
 #include "libavutil/opt.h"
 #include "libavutil/aes.h"
+#include "libavutil/aes_ctr.h"
 #include "libavutil/sha.h"
 #include "libavutil/timecode.h"
 #include "libavcodec/ac3tab.h"
@ -2172,6 +2173,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)

        sc->pseudo_stream_id = st->codec->codec_tag ? -1 : pseudo_stream_id;
        sc->dref_id= dref_id;
+        sc->format = format;

        id = mov_codec_id(st, format);

@ -3956,6 +3958,164 @@ static int mov_read_free(MOVContext *c, AVIOContext *pb, MOVAtom atom)
    return 0;
 }

+static int mov_read_frma(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    uint32_t format = avio_rl32(pb);
+    MOVStreamContext *sc;
+    enum AVCodecID id;
+    AVStream *st;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams - 1];
+    sc = st->priv_data;
+
+    switch (sc->format)
+    {
+    case MKTAG('e','n','c','v'):        // encrypted video
+    case MKTAG('e','n','c','a'):        // encrypted audio
+        id = mov_codec_id(st, format);
+        if (st->codec->codec_id != AV_CODEC_ID_NONE &&
+            st->codec->codec_id != id) {
+            av_log(c->fc, AV_LOG_WARNING,
+                   "ignoring 'frma' atom of '%.4s', stream has codec id %d\n",
+                   (char*)&format, st->codec->codec_id);
+            break;
+        }
+
+        st->codec->codec_id = id;
+        sc->format = format;
+        break;
+
+    default:
+        av_log(c->fc, AV_LOG_WARNING,
+               "ignoring 'frma' atom of '%.4s', stream format is '%.4s'\n",
+               (char*)&format, (char*)&sc->format);
+        break;
+    }
+
+    return 0;
+}
+
+static int mov_read_senc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+    MOVStreamContext *sc;
+    size_t auxiliary_info_size;
+
+    if (c->decryption_key_len == 0 || c->fc->nb_streams < 1)
+        return 0;
+
+    st = c->fc->streams[c->fc->nb_streams - 1];
+    sc = st->priv_data;
+
+    if (sc->cenc.aes_ctr) {
+        av_log(c->fc, AV_LOG_ERROR, "duplicate senc atom\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    avio_r8(pb); /* version */
+    sc->cenc.use_subsamples = avio_rb24(pb) & 0x02; /* flags */
+
+    avio_rb32(pb);        /* entries */
+
+    if (atom.size < 8) {
+        av_log(c->fc, AV_LOG_ERROR, "senc atom size %"PRId64" too small\n", atom.size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* save the auxiliary info as is */
+    auxiliary_info_size = atom.size - 8;
+
+    sc->cenc.auxiliary_info = av_malloc(auxiliary_info_size);
+    if (!sc->cenc.auxiliary_info) {
+        return AVERROR(ENOMEM);
+    }
+
+    sc->cenc.auxiliary_info_end = sc->cenc.auxiliary_info + auxiliary_info_size;
+
+    sc->cenc.auxiliary_info_pos = sc->cenc.auxiliary_info;
+
+    if (avio_read(pb, sc->cenc.auxiliary_info, auxiliary_info_size) != auxiliary_info_size) {
+        av_log(c->fc, AV_LOG_ERROR, "failed to read the auxiliary info");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* initialize the cipher */
+    sc->cenc.aes_ctr = av_aes_ctr_alloc();
+    if (!sc->cenc.aes_ctr) {
+        return AVERROR(ENOMEM);
+    }
+
+    return av_aes_ctr_init(sc->cenc.aes_ctr, c->decryption_key);
+}
+
+static int cenc_filter(MOVContext *c, MOVStreamContext *sc, uint8_t *input, int size)
+{
+    uint32_t encrypted_bytes;
+    uint16_t subsample_count;
+    uint16_t clear_bytes;
+    uint8_t* input_end = input + size;
+
+    /* read the iv */
+    if (AES_CTR_IV_SIZE > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
+        av_log(c->fc, AV_LOG_ERROR, "failed to read iv from the auxiliary info\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    av_aes_ctr_set_iv(sc->cenc.aes_ctr, sc->cenc.auxiliary_info_pos);
+    sc->cenc.auxiliary_info_pos += AES_CTR_IV_SIZE;
+
+    if (!sc->cenc.use_subsamples)
+    {
+        /* decrypt the whole packet */
+        av_aes_ctr_crypt(sc->cenc.aes_ctr, input, input, size);
+        return 0;
+    }
+
+    /* read the subsample count */
+    if (sizeof(uint16_t) > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
+        av_log(c->fc, AV_LOG_ERROR, "failed to read subsample count from the auxiliary info\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    subsample_count = AV_RB16(sc->cenc.auxiliary_info_pos);
+    sc->cenc.auxiliary_info_pos += sizeof(uint16_t);
+
+    for (; subsample_count > 0; subsample_count--)
+    {
+        if (6 > sc->cenc.auxiliary_info_end - sc->cenc.auxiliary_info_pos) {
+            av_log(c->fc, AV_LOG_ERROR, "failed to read subsample from the auxiliary info\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        /* read the number of clear / encrypted bytes */
+        clear_bytes = AV_RB16(sc->cenc.auxiliary_info_pos);
+        sc->cenc.auxiliary_info_pos += sizeof(uint16_t);
+        encrypted_bytes = AV_RB32(sc->cenc.auxiliary_info_pos);
+        sc->cenc.auxiliary_info_pos += sizeof(uint32_t);
+
+        if ((uint64_t)clear_bytes + encrypted_bytes > input_end - input) {
+            av_log(c->fc, AV_LOG_ERROR, "subsample size exceeds the packet size left\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        /* skip the clear bytes */
+        input += clear_bytes;
+
+        /* decrypt the encrypted bytes */
+        av_aes_ctr_crypt(sc->cenc.aes_ctr, input, input, encrypted_bytes);
+        input += encrypted_bytes;
+    }
+
+    if (input < input_end) {
+        av_log(c->fc, AV_LOG_ERROR, "leftover packet bytes after subsample processing\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
 static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('A','C','L','R'), mov_read_aclr },
 { MKTAG('A','P','R','G'), mov_read_avid },
@ -4030,6 +4190,9 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('C','i','n', 0x8e), mov_read_targa_y216 },
 { MKTAG('f','r','e','e'), mov_read_free },
 { MKTAG('-','-','-','-'), mov_read_custom },
+{ MKTAG('s','i','n','f'), mov_read_default },
+{ MKTAG('f','r','m','a'), mov_read_frma },
+{ MKTAG('s','e','n','c'), mov_read_senc },
 { 0, NULL }
 };

@ -4388,6 +4551,9 @@ static int mov_read_close(AVFormatContext *s)
        av_freep(&sc->elst_data);
        av_freep(&sc->rap_group);
        av_freep(&sc->display_matrix);
+
+        av_freep(&sc->cenc.auxiliary_info);
+        av_aes_ctr_free(sc->cenc.aes_ctr);
    }

    if (mov->dv_demux) {
@ -4565,6 +4731,12 @@ static int mov_read_header(AVFormatContext *s)
    MOVAtom atom = { AV_RL32("root") };
    int i;

+    if (mov->decryption_key_len != 0 && mov->decryption_key_len != AES_CTR_KEY_SIZE) {
+        av_log(s, AV_LOG_ERROR, "Invalid decryption key len %d expected %d\n",
+            mov->decryption_key_len, AES_CTR_KEY_SIZE);
+        return AVERROR(EINVAL);
+    }
+
    mov->fc = s;
    mov->trak_index = -1;
    /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
@ -4882,6 +5054,13 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
    if (mov->aax_mode)
        aax_filter(pkt->data, pkt->size, mov);

+    if (sc->cenc.aes_ctr) {
+        ret = cenc_filter(mov, sc, pkt->data, pkt->size);
+        if (ret) {
+            return ret;
+        }
+    }
+
    return 0;
 }

@ -5032,6 +5211,8 @@ static const AVOption mov_options[] = {
        "Fixed key used for handling Audible AAX files", OFFSET(audible_fixed_key),
        AV_OPT_TYPE_BINARY, {.str="77214d4b196a87cd520045fd20a51d67"},
        .flags = AV_OPT_FLAG_DECODING_PARAM },
+    { "decryption_key", "The media decryption key (hex)", OFFSET(decryption_key), AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_DECODING_PARAM },
+
    { NULL },
 };