[PATCH] Optimize x*x*x*x*x*x using 3 multiplications.

Discussion:

Roger Sayle

2003-07-29 17:13:44 UTC

The following patch allows gcc to optimize floating point expressions
such as x*x*x*x*x*x when compiled with -ffast-math to use only three
multiplications rather than five (as currently generated by mainline).

The secret is to reuse the new __builtin_pow{,f,l} optimizations by
adding three "term rewriting" rules to GCC's constant folding.
These are:

x*x => pow(x,2)
x*pow(x,c) => pow(x,c+1)
pow(x,c)*x => pow(x,c+1)

and just for luck, I've also added:

pow(x,c)/x => pow(x,c-1)

We already have rules for y/pow(x,z) => y*pow(x,-z) and also for
pow(x,y)*pow(x,z) => pow(x,y+z).

For the computer scientists and/or reviewers here are some invariants.
These transformations guarantee that we only use pow/powf/powl as a place
holder when we know that expand_builtin_pow will generate multiplications.
Firstly, we only convert x*x into pow(x,2) with unsafe_math_optimizations
and !optimize_size. Secondly, all of the GCC's constant folding
transformations perserve the second argument as an integer, unless the
source code contains an explicit call to pow/powf/powl. Thirdly, pow
is guaranteed not to affect errno if the second argument is an integer.

One piece of ugliness is that we now have to disable the constant folding
transformation of pow(x,2) into x*x. Clearly, if GCC's constant folding
transforms both ways calling fold on the result, we'll end up with
unbounded recursion. For this reason, pow(x,-1), pow(x,0) and pow(x,1)
have "canonical" representations, 1.0/x, 1.0 and x respectively, and
all other expressions prefer the "functional" form. The patch below
therefore disables the inverse transformation *during folding*.

To compensate for disabling pow(x,2) into x*x in constant folding, which
is done even without -ffast-math or with -Os, I also do the counter-tweak
to expand_builtin_pow, so that we always expand exponents -1, 0, 1 and 2
as RTL. The only functional change should be that we no longer convert
pow(x,-2) as 1.0/(x*x) with "-ffast-math -Os". This should not be
unreasonable as 1.0/(x*x) may be larger on machines without FP insns.

To demonstrate, with "-O2 -ffast-math -fomit-frame-pointer" on x86:

double foo(double x)
{
return x*x*x*x*x*x;
}

Before...
foo: fldl 4(%esp)
fld %st(0)
fmul %st(1), %st
fmul %st(1), %st
fmul %st(1), %st
fmul %st(1), %st
fmulp %st, %st(1)
ret

After...
foo:
fldl 4(%esp)
fld %st(0)
fmul %st(1), %st
fmulp %st, %st(1)
fmul %st(0), %st
ret

The following patch has been tested on i686-pc-linux-gnu with a full
"make bootstrap", all languages except treelang, and regression tested
with a top-level "make -k check" with no new failures.

Ok for mainline?

2003-07-29 Roger Sayle <***@eyesopen.com>

* fold-const.c (fold <MULT_EXPR>): Optimize both x*pow(x,c) and
pow(x,c)*x as pow(x,c+1) for constant values c. Optimize x*x
as pow(x,2.0) when the latter will be expanded back into x*x.
(fold <RDIV_EXPR>): Optimize pow(x,c)/x as pow(x,c-1).
* builtins.c (expand_builtin_pow): Ignore flag_errno_math as
pow can never set errno when used with an integer exponent.
Always use expand_powi when exponent is -1, 0, 1 or 2.
(fold_builtin): Don't rewrite pow(x,2.0) as x*x nor pow(x,-2.0)
as 1.0/(x*x). This avoids unbounded recursion as we now prefer
the pow forms of these expressions.

* gcc.dg/builtins-27.c: New test case.

Index: fold-const.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fold-const.c,v
retrieving revision 1.284
diff -c -3 -p -r1.284 fold-const.c
*** fold-const.c 22 Jul 2003 23:30:06 -0000 1.284
--- fold-const.c 28 Jul 2003 22:15:45 -0000
*************** fold (tree expr)
*** 6049,6054 ****
--- 6049,6128 ----
return build_function_call_expr (sinfn,
TREE_OPERAND (arg0, 1));
}
+
+ /* Optimize x*pow(x,c) as pow(x,c+1). */
+ if (fcode1 == BUILT_IN_POW
+ || fcode1 == BUILT_IN_POWF
+ || fcode1 == BUILT_IN_POWL)
+ {
+ tree arg10 = TREE_VALUE (TREE_OPERAND (arg1, 1));
+ tree arg11 = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (arg1,
+ 1)));
+ if (TREE_CODE (arg11) == REAL_CST
+ && ! TREE_CONSTANT_OVERFLOW (arg11)
+ && operand_equal_p (arg0, arg10, 0))
+ {
+ tree powfn = TREE_OPERAND (TREE_OPERAND (arg1, 0), 0);
+ REAL_VALUE_TYPE c;
+ tree arg, arglist;
+
+ c = TREE_REAL_CST (arg11);
+ real_arithmetic (&c, PLUS_EXPR, &c, &dconst1);
+ arg = build_real (type, c);
+ arglist = build_tree_list (NULL_TREE, arg);
+ arglist = tree_cons (NULL_TREE, arg0, arglist);
+ return build_function_call_expr (powfn, arglist);
+ }
+ }
+
+ /* Optimize pow(x,c)*x as pow(x,c+1). */
+ if (fcode0 == BUILT_IN_POW
+ || fcode0 == BUILT_IN_POWF
+ || fcode0 == BUILT_IN_POWL)
+ {
+ tree arg00 = TREE_VALUE (TREE_OPERAND (arg0, 1));
+ tree arg01 = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (arg0,
+ 1)));
+ if (TREE_CODE (arg01) == REAL_CST
+ && ! TREE_CONSTANT_OVERFLOW (arg01)
+ && operand_equal_p (arg1, arg00, 0))
+ {
+ tree powfn = TREE_OPERAND (TREE_OPERAND (arg0, 0), 0);
+ REAL_VALUE_TYPE c;
+ tree arg, arglist;
+
+ c = TREE_REAL_CST (arg01);
+ real_arithmetic (&c, PLUS_EXPR, &c, &dconst1);
+ arg = build_real (type, c);
+ arglist = build_tree_list (NULL_TREE, arg);
+ arglist = tree_cons (NULL_TREE, arg1, arglist);
+ return build_function_call_expr (powfn, arglist);
+ }
+ }
+
+ /* Optimize x*x as pow(x,2.0), which is expanded as x*x. */
+ if (! optimize_size
+ && operand_equal_p (arg0, arg1, 0))
+ {
+ tree powfn;
+
+ if (type == double_type_node)
+ powfn = implicit_built_in_decls[BUILT_IN_POW];
+ else if (type == float_type_node)
+ powfn = implicit_built_in_decls[BUILT_IN_POWF];
+ else if (type == long_double_type_node)
+ powfn = implicit_built_in_decls[BUILT_IN_POWL];
+ else
+ powfn = NULL_TREE;
+
+ if (powfn)
+ {
+ tree arg = build_real (type, dconst2);
+ tree arglist = build_tree_list (NULL_TREE, arg);
+ arglist = tree_cons (NULL_TREE, arg0, arglist);
+ return build_function_call_expr (powfn, arglist);
+ }
+ }
}
}
goto associate;
*************** fold (tree expr)
*** 6304,6309 ****
--- 6378,6407 ----
return fold (build (RDIV_EXPR, type,
build_real (type, dconst1),
tmp));
+ }
+ }
+
+ /* Optimize pow(x,c)/x as pow(x,c-1). */
+ if (fcode0 == BUILT_IN_POW
+ || fcode0 == BUILT_IN_POWF
+ || fcode0 == BUILT_IN_POWL)
+ {
+ tree arg00 = TREE_VALUE (TREE_OPERAND (arg0, 1));
+ tree arg01 = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (arg0, 1)));
+ if (TREE_CODE (arg01) == REAL_CST
+ && ! TREE_CONSTANT_OVERFLOW (arg01)
+ && operand_equal_p (arg1, arg00, 0))
+ {
+ tree powfn = TREE_OPERAND (TREE_OPERAND (arg0, 0), 0);
+ REAL_VALUE_TYPE c;
+ tree arg, arglist;
+
+ c = TREE_REAL_CST (arg01);
+ real_arithmetic (&c, MINUS_EXPR, &c, &dconst1);
+ arg = build_real (type, c);
+ arglist = build_tree_list (NULL_TREE, arg);
+ arglist = tree_cons (NULL_TREE, arg1, arglist);
+ return build_function_call_expr (powfn, arglist);
}
}
}
Index: builtins.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/builtins.c,v
retrieving revision 1.234
diff -c -3 -p -r1.234 builtins.c
*** builtins.c 24 Jul 2003 21:04:12 -0000 1.234
--- builtins.c 28 Jul 2003 22:15:53 -0000
*************** expand_builtin_pow (tree exp, rtx target
*** 2170,2179 ****
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));

! if (flag_unsafe_math_optimizations
! && ! flag_errno_math
! && ! optimize_size
! && TREE_CODE (arg1) == REAL_CST
&& ! TREE_CONSTANT_OVERFLOW (arg1))
{
REAL_VALUE_TYPE cint;
--- 2170,2176 ----
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));

! if (TREE_CODE (arg1) == REAL_CST
&& ! TREE_CONSTANT_OVERFLOW (arg1))
{
REAL_VALUE_TYPE cint;
*************** expand_builtin_pow (tree exp, rtx target
*** 2183,2195 ****
c = TREE_REAL_CST (arg1);
n = real_to_integer (&c);
real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
! if (real_identical (&c, &cint)
! && powi_cost (n) <= POWI_MAX_MULTS)
{
! enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
! rtx op = expand_expr (arg0, subtarget, VOIDmode, 0);
! op = force_reg (mode, op);
! return expand_powi (op, mode, n);
}
}
return expand_builtin_mathfn_2 (exp, target, NULL_RTX);
--- 2180,2200 ----
c = TREE_REAL_CST (arg1);
n = real_to_integer (&c);
real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
! if (real_identical (&c, &cint))
{
! /* If the exponent is -1, 0, 1 or 2, then expand_powi is exact.
! Otherwise, check the number of multiplications required.
! Note that pow never sets errno for an integer exponent. */
! if ((n >= -1 && n <= 2)
! || (flag_unsafe_math_optimizations
! && ! optimize_size
! && powi_cost (n) <= POWI_MAX_MULTS))
! {
! enum machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
! rtx op = expand_expr (arg0, subtarget, VOIDmode, 0);
! op = force_reg (mode, op);
! return expand_powi (op, mode, n);
! }
}
}
return expand_builtin_mathfn_2 (exp, target, NULL_RTX);
*************** fold_builtin (tree exp)
*** 6244,6271 ****
return fold (build (RDIV_EXPR, type,
build_real (type, dconst1),
arg0));
-
- /* Optimize pow(x,2.0) = x*x. */
- if (REAL_VALUES_EQUAL (c, dconst2)
- && (*lang_hooks.decls.global_bindings_p) () == 0
- && ! CONTAINS_PLACEHOLDER_P (arg0))
- {
- arg0 = save_expr (arg0);
- return fold (build (MULT_EXPR, type, arg0, arg0));
- }
-
- /* Optimize pow(x,-2.0) = 1.0/(x*x). */
- if (flag_unsafe_math_optimizations
- && REAL_VALUES_EQUAL (c, dconstm2)
- && (*lang_hooks.decls.global_bindings_p) () == 0
- && ! CONTAINS_PLACEHOLDER_P (arg0))
- {
- arg0 = save_expr (arg0);
- return fold (build (RDIV_EXPR, type,
- build_real (type, dconst1),
- fold (build (MULT_EXPR, type,
- arg0, arg0))));
- }

/* Optimize pow(x,0.5) = sqrt(x). */
if (flag_unsafe_math_optimizations
--- 6249,6254 ----

/* Copyright (C) 2003 Free Software Foundation.

Check that constant folding of built-in math functions doesn't
break anything and produces the expected results.

Written by Roger Sayle, 29th July 2003. */

/* { dg-do link } */
/* { dg-options "-O2 -ffast-math" } */

extern void link_error(void);

extern double pow(double,double);

void test(double x)
{
if (pow(x,2.0) != x*x)
link_error ();

if (x*pow(x,2.0) != pow(x,3.0))
link_error ();

if (pow(x,2.0)*x != pow(x,3.0))
link_error ();

if (pow(x,3.0) != x*x*x)
link_error ();

if (pow(x,2.0)*x != x*x*x)
link_error ();

if (x*pow(x,2.0) != x*x*x)
link_error ();

if (pow(x,3.0)/x != pow(x,2.0))
link_error ();

if (pow(x,3.0)/x != x*x)
link_error ();
}

int main()
{
test (2.0);
return 0;
}

Roger
--
Roger Sayle, E-mail: ***@eyesopen.com
OpenEye Scientific Software, WWW: http://www.eyesopen.com/
Suite 1107, 3600 Cerrillos Road, Tel: (+1) 505-473-7385
Santa Fe, New Mexico, 87507. Fax: (+1) 505-473-0833

Gabriel Dos Reis

2003-07-29 17:31:27 UTC