vo_gpu_next: use pl_dispatch_info_move to avoid useless data copy

Instead copy the data on-demand when VOCTRL_PERFORMANCE_DATA is
requested.
This commit is contained in:
Kacper Michajłow 2023-04-29 22:08:50 +02:00 committed by Niklas Haas
parent c637beb522
commit b73d96776c
2 changed files with 47 additions and 23 deletions

View File

@ -930,8 +930,8 @@ if features['libplacebo']
endif
libplacebo_next = get_option('libplacebo-next').require(
features['libplacebo'] and libplacebo.version().version_compare('>=5.264.0'),
error_message: 'libplacebo v5.264.0+ was not found!',
features['libplacebo'] and libplacebo.version().version_compare('>=5.266.0'),
error_message: 'libplacebo v5.266.0+ was not found!',
)
features += {'libplacebo-next': libplacebo_next.allowed()}
if features['libplacebo-next']

View File

@ -84,6 +84,11 @@ struct user_lut {
struct pl_custom_lut *lut;
};
struct frame_info {
int count;
struct pl_dispatch_info info[VO_PASS_PERF_MAX];
};
struct priv {
struct mp_log *log;
struct mpv_global *global;
@ -145,8 +150,9 @@ struct priv {
int num_user_hooks;
// Performance data of last frame
struct voctrl_performance_data perf;
pthread_mutex_t perf_lock;
struct frame_info perf_fresh;
struct frame_info perf_redraw;
bool delayed_peak;
bool inter_preserve;
@ -750,30 +756,17 @@ static void info_callback(void *priv, const struct pl_render_info *info)
if (info->index >= VO_PASS_PERF_MAX)
return; // silently ignore clipped passes, whatever
struct mp_frame_perf *frame;
struct frame_info *frame;
switch (info->stage) {
case PL_RENDER_STAGE_FRAME: frame = &p->perf.fresh; break;
case PL_RENDER_STAGE_BLEND: frame = &p->perf.redraw; break;
case PL_RENDER_STAGE_FRAME: frame = &p->perf_fresh; break;
case PL_RENDER_STAGE_BLEND: frame = &p->perf_redraw; break;
default: abort();
}
int index = info->index;
struct mp_pass_perf *perf = &frame->perf[index];
const struct pl_dispatch_info *pass = info->pass;
static_assert(VO_PERF_SAMPLE_COUNT >= MP_ARRAY_SIZE(pass->samples), "");
assert(pass->num_samples <= MP_ARRAY_SIZE(pass->samples));
pthread_mutex_lock(&p->perf_lock);
perf->count = MPMIN(pass->num_samples, VO_PERF_SAMPLE_COUNT);
memcpy(perf->samples, pass->samples, perf->count * sizeof(pass->samples[0]));
perf->last = pass->last;
perf->peak = pass->peak;
perf->avg = pass->average;
strncpy(frame->desc[index], pass->shader->description, sizeof(frame->desc[index]) - 1);
frame->desc[index][sizeof(frame->desc[index]) - 1] = '\0';
frame->count = index + 1;
frame->count = info->index + 1;
pl_dispatch_info_move(&frame->info[info->index], info->pass);
pthread_mutex_unlock(&p->perf_lock);
}
@ -1274,6 +1267,30 @@ done:
pl_tex_destroy(gpu, &fbo);
}
static inline void copy_frame_info_to_mp(struct frame_info *pl,
struct mp_frame_perf *mp) {
static_assert(MP_ARRAY_SIZE(pl->info) == MP_ARRAY_SIZE(mp->perf), "");
assert(pl->count <= VO_PASS_PERF_MAX);
mp->count = MPMIN(pl->count, VO_PASS_PERF_MAX);
for (int i = 0; i < mp->count; ++i) {
const struct pl_dispatch_info *pass = &pl->info[i];
static_assert(VO_PERF_SAMPLE_COUNT >= MP_ARRAY_SIZE(pass->samples), "");
assert(pass->num_samples <= MP_ARRAY_SIZE(pass->samples));
struct mp_pass_perf *perf = &mp->perf[i];
perf->count = MPMIN(pass->num_samples, VO_PERF_SAMPLE_COUNT);
memcpy(perf->samples, pass->samples, perf->count * sizeof(pass->samples[0]));
perf->last = pass->last;
perf->peak = pass->peak;
perf->avg = pass->average;
strncpy(mp->desc[i], pass->shader->description, sizeof(mp->desc[i]) - 1);
mp->desc[i][sizeof(mp->desc[i]) - 1] = '\0';
}
}
static int control(struct vo *vo, uint32_t request, void *data)
{
struct priv *p = vo->priv;
@ -1315,11 +1332,14 @@ static int control(struct vo *vo, uint32_t request, void *data)
p->want_reset = true;
return VO_TRUE;
case VOCTRL_PERFORMANCE_DATA:
case VOCTRL_PERFORMANCE_DATA: {
pthread_mutex_lock(&p->perf_lock);
*(struct voctrl_performance_data *) data = p->perf;
struct voctrl_performance_data *perf = data;
copy_frame_info_to_mp(&p->perf_fresh, &perf->fresh);
copy_frame_info_to_mp(&p->perf_redraw, &perf->redraw);
pthread_mutex_unlock(&p->perf_lock);
return true;
}
case VOCTRL_SCREENSHOT:
video_screenshot(vo, data);
@ -1422,6 +1442,10 @@ static void uninit(struct vo *vo)
pl_renderer_destroy(&p->rr);
pthread_mutex_destroy(&p->perf_lock);
for (int i = 0; i < VO_PASS_PERF_MAX; ++i) {
pl_shader_info_deref(&p->perf_fresh.info[i].shader);
pl_shader_info_deref(&p->perf_redraw.info[i].shader);
}
p->ra_ctx = NULL;
p->pllog = NULL;