diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index 9e786c6dcc..9870936bc5 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -232,7 +232,7 @@ void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num)
 
     if (ptr) {
         gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
-        gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_DYNAMIC_DRAW);
+        gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW);
         gl->BindBuffer(GL_ARRAY_BUFFER, 0);
     }
 
@@ -1328,8 +1328,17 @@ void gl_pbo_upload_tex(struct gl_pbo_upload *pbo, GL *gl, bool use_pbo,
         pbo->buffer_size = buffer_size;
         gl->GenBuffers(1, &pbo->buffer);
         gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo->buffer);
+        // Magic time: Because we memcpy once from RAM to the buffer, and then
+        // the GPU needs to read from this anyway, we actually *don't* want
+        // this buffer to be allocated in RAM. If we allocate it in VRAM
+        // instead, we can reduce this to a single copy: from RAM into VRAM.
+        // Unfortunately, drivers e.g. nvidia will think GL_STREAM_DRAW is best
+        // allocated on host memory instead of device memory, so we lie about
+        // the usage to fool the driver into giving us a buffer in VRAM instead
+        // of RAM, which can be significantly faster for our use case.
+        // Seriously, fuck OpenGL.
         gl->BufferData(GL_PIXEL_UNPACK_BUFFER, NUM_PBO_BUFFERS * buffer_size,
-                       NULL, GL_DYNAMIC_COPY);
+                       NULL, GL_STREAM_COPY);
     }
 
     size_t offset = buffer_size * pbo->index;