diff --git a/libavformat/isom.h b/libavformat/isom.h
index f05c2d9c28..9d8646d2ea 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -318,6 +318,12 @@ typedef struct MOVContext {
     uint32_t max_stts_delta;
     int is_still_picture_avif;
     int primary_item_id;
+    struct {
+        int item_id;
+        int extent_length;
+        int64_t extent_offset;
+    } *avif_info;
+    int avif_info_size;
 } MOVContext;
 
 int ff_mp4_read_descr_len(AVIOContext *pb);
diff --git a/libavformat/mov.c b/libavformat/mov.c
index a09a762d91..6ee6ed0950 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -4698,6 +4698,69 @@ static int mov_read_custom(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return ret;
 }
 
+static int avif_add_stream(MOVContext *c, int item_id)
+{
+    MOVStreamContext *sc;
+    AVStream *st;
+    int item_index = -1;
+    for (int i = 0; i < c->avif_info_size; i++)
+        if (c->avif_info[i].item_id == item_id) {
+            item_index = i;
+            break;
+        }
+    if (item_index < 0)
+        return AVERROR_INVALIDDATA;
+    st = avformat_new_stream(c->fc, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    st->id = c->fc->nb_streams;
+    sc = av_mallocz(sizeof(MOVStreamContext));
+    if (!sc)
+        return AVERROR(ENOMEM);
+
+    st->priv_data = sc;
+    st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codecpar->codec_id = AV_CODEC_ID_AV1;
+    sc->ffindex = st->index;
+    c->trak_index = st->index;
+    st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
+    st->time_base.num = st->time_base.den = 1;
+    st->nb_frames = 1;
+    sc->time_scale = 1;
+    sc = st->priv_data;
+    sc->pb = c->fc->pb;
+    sc->pb_is_copied = 1;
+
+    // Populate the necessary fields used by mov_build_index.
+    sc->stsc_count = 1;
+    sc->stsc_data = av_malloc_array(1, sizeof(*sc->stsc_data));
+    if (!sc->stsc_data)
+        return AVERROR(ENOMEM);
+    sc->stsc_data[0].first = 1;
+    sc->stsc_data[0].count = 1;
+    sc->stsc_data[0].id = 1;
+    sc->chunk_count = 1;
+    sc->chunk_offsets = av_malloc_array(1, sizeof(*sc->chunk_offsets));
+    if (!sc->chunk_offsets)
+        return AVERROR(ENOMEM);
+    sc->sample_count = 1;
+    sc->sample_sizes = av_malloc_array(1, sizeof(*sc->sample_sizes));
+    if (!sc->sample_sizes)
+        return AVERROR(ENOMEM);
+    sc->stts_count = 1;
+    sc->stts_data = av_malloc_array(1, sizeof(*sc->stts_data));
+    if (!sc->stts_data)
+        return AVERROR(ENOMEM);
+    sc->stts_data[0].count = 1;
+    // Not used for still images. But needed by mov_build_index.
+    sc->stts_data[0].duration = 0;
+    sc->sample_sizes[0] = c->avif_info[item_index].extent_length;
+    sc->chunk_offsets[0] = c->avif_info[item_index].extent_offset;
+
+    mov_build_index(c, st);
+    return 0;
+}
+
 static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
     while (atom.size > 8) {
@@ -4707,9 +4770,23 @@ static int mov_read_meta(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         tag = avio_rl32(pb);
         atom.size -= 4;
         if (tag == MKTAG('h','d','l','r')) {
+            int ret;
             avio_seek(pb, -8, SEEK_CUR);
             atom.size += 8;
-            return mov_read_default(c, pb, atom);
+            if ((ret = mov_read_default(c, pb, atom)) < 0)
+                return ret;
+            if (c->is_still_picture_avif) {
+                int ret;
+                // Add a stream for the YUV planes (primary item).
+                if ((ret = avif_add_stream(c, c->primary_item_id)) < 0)
+                    return ret;
+                // For still AVIF images, the meta box contains all the
+                // necessary information that would generally be provided by the
+                // moov box. So simply mark that we have found the moov box so
+                // that parsing can continue.
+                c->found_moov = 1;
+            }
+            return ret;
         }
     }
     return 0;
@@ -7478,8 +7555,6 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     int item_count, extent_count;
     uint64_t base_offset, extent_offset, extent_length;
     uint8_t value;
-    AVStream *st;
-    MOVStreamContext *sc;
 
     if (!c->is_still_picture_avif) {
         // * For non-avif, we simply ignore the iloc box.
@@ -7493,27 +7568,6 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         return 0;
     }
 
-    st = avformat_new_stream(c->fc, NULL);
-    if (!st)
-        return AVERROR(ENOMEM);
-    st->id = c->fc->nb_streams;
-    sc = av_mallocz(sizeof(MOVStreamContext));
-    if (!sc)
-        return AVERROR(ENOMEM);
-
-    st->priv_data = sc;
-    st->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
-    st->codecpar->codec_id = AV_CODEC_ID_AV1;
-    sc->ffindex = st->index;
-    c->trak_index = st->index;
-    st->avg_frame_rate.num = st->avg_frame_rate.den = 1;
-    st->time_base.num = st->time_base.den = 1;
-    st->nb_frames = 1;
-    sc->time_scale = 1;
-    sc = st->priv_data;
-    sc->pb = c->fc->pb;
-    sc->pb_is_copied = 1;
-
     version = avio_r8(pb);
     avio_rb24(pb);  // flags.
 
@@ -7529,34 +7583,17 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     }
     item_count = (version < 2) ? avio_rb16(pb) : avio_rb32(pb);
 
-    // Populate the necessary fields used by mov_build_index.
-    sc->stsc_count = 1;
-    sc->stsc_data = av_malloc_array(1, sizeof(*sc->stsc_data));
-    if (!sc->stsc_data)
+    c->avif_info = av_malloc_array(item_count, sizeof(*c->avif_info));
+    if (!c->avif_info)
         return AVERROR(ENOMEM);
-    sc->stsc_data[0].first = 1;
-    sc->stsc_data[0].count = 1;
-    sc->stsc_data[0].id = 1;
-    sc->chunk_count = 1;
-    sc->chunk_offsets = av_malloc_array(1, sizeof(*sc->chunk_offsets));
-    if (!sc->chunk_offsets)
-        return AVERROR(ENOMEM);
-    sc->sample_count = 1;
-    sc->sample_sizes = av_malloc_array(1, sizeof(*sc->sample_sizes));
-    if (!sc->sample_sizes)
-        return AVERROR(ENOMEM);
-    sc->stts_count = 1;
-    sc->stts_data = av_malloc_array(1, sizeof(*sc->stts_data));
-    if (!sc->stts_data)
-        return AVERROR(ENOMEM);
-    sc->stts_data[0].count = 1;
-    // Not used for still images. But needed by mov_build_index.
-    sc->stts_data[0].duration = 0;
+    c->avif_info_size = item_count;
 
     for (int i = 0; i < item_count; i++) {
         int item_id = (version < 2) ? avio_rb16(pb) : avio_rb32(pb);
         if (avio_feof(pb))
             return AVERROR_INVALIDDATA;
+        c->avif_info[i].item_id = item_id;
+
         if (version > 0)
             avio_rb16(pb);  // construction_method.
         avio_rb16(pb);  // data_reference_index.
@@ -7572,20 +7609,11 @@ static int mov_read_iloc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             if (rb_size(pb, &extent_offset, offset_size) < 0 ||
                 rb_size(pb, &extent_length, length_size) < 0)
                 return AVERROR_INVALIDDATA;
-            if (item_id == c->primary_item_id) {
-                sc->sample_sizes[0] = extent_length;
-                sc->chunk_offsets[0] = base_offset + extent_offset;
-            }
+            c->avif_info[i].extent_length = extent_length;
+            c->avif_info[i].extent_offset = base_offset + extent_offset;
         }
     }
 
-    mov_build_index(c, st);
-
-    // For still AVIF images, the iloc box contains all the necessary
-    // information that would generally be provided by the moov box. So simply
-    // mark that we have found the moov box so that parsing can continue.
-    c->found_moov = 1;
-
     return atom.size;
 }
 
@@ -8189,6 +8217,7 @@ static int mov_read_close(AVFormatContext *s)
 
     av_freep(&mov->aes_decrypt);
     av_freep(&mov->chapter_tracks);
+    av_freep(&mov->avif_info);
 
     return 0;
 }