[cairo] More optimization
André Tupinambá
andrelrt at gmail.com
Sun Mar 23 12:43:54 PDT 2008
Hi,
I ran the cairo-perf with VTune software and saw so many "hot spots"
in pixman-compose module... :)
"Well, I can optimize something here too", I think.
So, I choose the radial gradient and made some changes. Look at the
patch below...
Regards,
André
- cut here -------------------------------------------------------------------
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index faf2523..ac12e6d 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -3536,43 +3536,90 @@ static void
pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width
affine = pict->common.transform->matrix[2][0] == 0 &&
v.vector[2] == pixman_fixed_1;
}
+/* Changing the code to compute some values outside the loop
+ *
+ * Some numbers and expressions are constants, so we can
+ * compute they once, instead inside the loop.
+ *
+ * For pdx and pdy we change the main expression to find the step value.
+ * We change from:
+ *
+ * pdx = rx - c1x
+ *
+ * To:
+ *
+ * pdx = (rx + cx) - c1x
+ * pdx = (rx - c1x) + cx
+ * pdx = pdx + cx
+ *
+ * pdx += cx
+ *
+ * This not make a great boost of performance, but this knowledge make us able
+ * to verify the B component.
+ *
+ * For B component we use a trick to change the main equation.
+ * The original B equation was:
+ *
+ * B = -2. * (pdx*radial->cdx + pdy*radial->cdy + r1*radial->dr)
+ *
+ * Checking the next step the equation:
+ *
+ * B = -2. * ((pdx+cx)*radial->cdx + (pdy+cy)*radial->cdy + r1*radial->dr)
+ * B = -2. * (pdx*radial->cdx + cx*radial->cdx + pdy*radial->cdy +
cy*radial->cdy + r1*radial->dr)
+ * B = -2. * (pdx*radial->cdx + pdy*radial->cdy + r1*radial->dr) +
-2. * (cx*radial->cdx + cy*radial->cdy)
+ * B = B + -2. * (cx*radial->cdx + cy*radial->cdy)
+ *
+ * Note thats -2. * (cx*radial->cdx + cy*radial->cdy) is constant,
+ * so that can be precomputed outside the loop.
+ *
+ * Bstep = -2. * (cx*radial->cdx + cy*radial->cdy)
+ * B += Bstep
+ *
+ * So we changed the expensive expression for a cheap sum inside the loop.
+ */
if (pict->common.type == RADIAL) {
radial_gradient_t *radial = (radial_gradient_t *)pict;
if (affine) {
+ double c1x = radial->c1.x / 65536.0;
+ double c1y = radial->c1.y / 65536.0;
+ double r1 = radial->c1.radius / 65536.0;
+ double det;
+ pixman_fixed_48_16_t t;
+
+ double r1pow2 = r1 * r1;
+ double pdx = rx - c1x - cx;
+ double pdy = ry - c1y - cy;
+
+ double A = radial->A;
+ double InvA2 = 65536. / (2. * A);
+ double A4 = (4. * A);
+ double Bstep = -2. * (cx*radial->cdx + cy*radial->cdy);
+
+ double B = -2. * ((pdx+cx)*radial->cdx +
(pdy+cy)*radial->cdy + r1*radial->dr) - Bstep;
+ double C;
+
while (buffer < end) {
if (!mask || *mask++ & maskBits)
{
- double pdx, pdy;
- double B, C;
- double det;
- double c1x = radial->c1.x / 65536.0;
- double c1y = radial->c1.y / 65536.0;
- double r1 = radial->c1.radius / 65536.0;
- pixman_fixed_48_16_t t;
+ pdx += cx;
+ pdy += cy;
- pdx = rx - c1x;
- pdy = ry - c1y;
+ B += Bstep;
- B = -2 * ( pdx * radial->cdx
- + pdy * radial->cdy
- + r1 * radial->dr);
- C = (pdx * pdx + pdy * pdy - r1 * r1);
+ C = (pdx * pdx + pdy * pdy - r1pow2);
- det = (B * B) - (4 * radial->A * C);
- if (det < 0.0)
- det = 0.0;
+ det = (B * B) - (A4 * C);
- if (radial->A < 0)
- t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 *
radial->A) * 65536);
- else
- t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 *
radial->A) * 65536);
+ if (det < 0.0)
+ t = (pixman_fixed_48_16_t) ((- B) * InvA2);
+ else if (A < 0)
+ t = (pixman_fixed_48_16_t) ((- B -
sqrt(det)) * InvA2);
+ else
+ t = (pixman_fixed_48_16_t) ((- B +
sqrt(det)) * InvA2);
*(buffer) = _gradient_walker_pixel (&walker, t);
}
++buffer;
-
- rx += cx;
- ry += cy;
}
} else {
/* projective */
More information about the cairo
mailing list