From 56cea3294a7eb13ce3620c626856d0329f84f2f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20B=C5=93sch?= Date: Sun, 26 May 2013 18:45:39 +0200 Subject: [PATCH] lavfi/lut3d: faster tetrahedral interpolation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increase performance by fetching only the necessary points. 1097 → 917 decicyles. --- libavfilter/vf_lut3d.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/libavfilter/vf_lut3d.c b/libavfilter/vf_lut3d.c index 41a7def574..df674d7f26 100644 --- a/libavfilter/vf_lut3d.c +++ b/libavfilter/vf_lut3d.c @@ -144,38 +144,44 @@ static inline struct rgbvec interp_tetrahedral(const LUT3DContext *lut3d, const int next[] = {NEXT(s->r), NEXT(s->g), NEXT(s->b)}; const struct rgbvec d = {s->r - prev[0], s->g - prev[1], s->b - prev[2]}; const struct rgbvec c000 = lut3d->lut[prev[0]][prev[1]][prev[2]]; - const struct rgbvec c001 = lut3d->lut[prev[0]][prev[1]][next[2]]; - const struct rgbvec c010 = lut3d->lut[prev[0]][next[1]][prev[2]]; - const struct rgbvec c011 = lut3d->lut[prev[0]][next[1]][next[2]]; - const struct rgbvec c100 = lut3d->lut[next[0]][prev[1]][prev[2]]; - const struct rgbvec c101 = lut3d->lut[next[0]][prev[1]][next[2]]; - const struct rgbvec c110 = lut3d->lut[next[0]][next[1]][prev[2]]; const struct rgbvec c111 = lut3d->lut[next[0]][next[1]][next[2]]; struct rgbvec c; if (d.r > d.g) { if (d.g > d.b) { + const struct rgbvec c100 = lut3d->lut[next[0]][prev[1]][prev[2]]; + const struct rgbvec c110 = lut3d->lut[next[0]][next[1]][prev[2]]; c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * c111.r; c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * c111.g; c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * c111.b; } else if (d.r > d.b) { + const struct rgbvec c100 = lut3d->lut[next[0]][prev[1]][prev[2]]; + const struct rgbvec c101 = lut3d->lut[next[0]][prev[1]][next[2]]; c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * c111.r; c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * c111.g; c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * c111.b; } else { + const struct rgbvec c001 = lut3d->lut[prev[0]][prev[1]][next[2]]; + const struct rgbvec c101 = lut3d->lut[next[0]][prev[1]][next[2]]; c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * c111.r; c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * c111.g; c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * c111.b; } } else { if (d.b > d.g) { + const struct rgbvec c001 = lut3d->lut[prev[0]][prev[1]][next[2]]; + const struct rgbvec c011 = lut3d->lut[prev[0]][next[1]][next[2]]; c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * c111.r; c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * c111.g; c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * c111.b; } else if (d.b > d.r) { + const struct rgbvec c010 = lut3d->lut[prev[0]][next[1]][prev[2]]; + const struct rgbvec c011 = lut3d->lut[prev[0]][next[1]][next[2]]; c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * c111.r; c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * c111.g; c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * c111.b; } else { + const struct rgbvec c010 = lut3d->lut[prev[0]][next[1]][prev[2]]; + const struct rgbvec c110 = lut3d->lut[next[0]][next[1]][prev[2]]; c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * c111.r; c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * c111.g; c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * c111.b;