Assembling and FFC Compiler really slow

315 views
Skip to first unread message

Mario Peutler

unread,
May 17, 2016, 8:42:32 AM5/17/16
to fenics-support
I am running my program on OSX 10.11.4

Dolfin 1.6

and I am having trouble with the assembling - in the beginning everything seems to be working fine. But after a few iterations the assembling is taking much more time.
Do I need to reset the memory or something?
I have tried varying the "form_compiler" but without success..
Any suggestions?

from dolfin import *
from time import *

# Class for interfacing with the Newton solver
class Subproblem(NonlinearProblem):
    def __init__(self, a, L):
        NonlinearProblem.__init__(self)
        self.L = L
        self.a = a
    def F(self, b, x):
        assemble(self.L, tensor=b)
    def J(self, A, x):
        assemble(self.a, tensor=A)

# Model parameters

#parameters["form_compiler"]["quadrature_degree"] = 14
parameters["form_compiler"]["optimize"] = True
parameters["form_compiler"]["representation"] = 'quadrature'
#parameters["form_compiler"]["representation"] = "uflacs"
#parameters["form_compiler"]["quadrature_degree"] = 2
parameters["form_compiler"]["cpp_optimize"] = True
#parameters['form_compiler']['cpp_optimize_flags'] = '-foo'

# Create mesh and define function space
problemit = 0
subproblemit = 0
time = time()

# set h << epsilon

h = 2 ** 7
sigma = 0.0001
#0.0001
epsln = 1/(16 * pi)
lmbda = 1 / (sigma * epsln)



mesh = UnitSquareMesh(h, h)
V = FunctionSpace(mesh, "CG", 1)
R = FunctionSpace(mesh, "R", 0)
W = V * R

# Create mesh and define function space
#mesh = RectangleMesh(-1, -1, 1, 1, 64, 64, 'left')

e_ns = Function(V)
e_ns.vector()[:] = 1.0
e_n = interpolate(e_ns, V)

# input data
a_1 = Constant(8.0)
a_2 = Constant(1.0)

#eps = 0.0000001

# phi exact - for comparision
phi_exact = Expression("(x[0]-0.5)*(x[0]-0.5)/(0.05*0.05)+(x[1]-0.5)*(x[1]-0.5)/(0.2*0.2) <=1 ? k0 : k1", k0=1, k1=-1, degree= 1, domain=mesh)
phi_e = interpolate(phi_exact, V)
a_phi_e = a_1 * (1 + phi_e)/2 + a_2 * (1 - phi_e)/2


#phi_0_e = assemble(phi_e * dx)

#phi_e = phi_e - phi_0_e



g_h = Expression("(near(x[0], 1) &&  ! near(x[1], 0))|| near(x[1], 1) ? k0 : k1", k0=-0.5, k1= 0.5, degree= 1, domain=mesh)
f = Constant(0)
print "assembling g"
print assemble(g_h*ds)

# Define variational problem
(y_obs, c) = TrialFunction(W)
(v, d) = TestFunctions(W)
a = a_phi_e*(inner(grad(y_obs), grad(v)) + c*v + y_obs*d)*dx
L = f*v*dx + g_h*v*ds

# Compute solution
w = Function(W)
solve(a == L, w)
(y_obs, c) = w.split()
#plot(y_obs, interactive=True)


file = File("output.pvd", "compressed")
file << y_obs

print "assembling y_obs"
print assemble(y_obs*dx)
# Save solution in VTK format


phi_init = Expression("(x[0]-0.5)*(x[0]-0.5)/(0.15*0.15)+(x[1]-0.5)*(x[1]-0.5)/(0.25*0.25) <=1 ? k0 : k1", k0=1, k1=-1, degree= 1, domain=mesh)
phi = interpolate(phi_init, V)
#plot(phi_e, title='phi', interactive=True, scale=0.0)
#plot(phi, title='phi',interactive=True, scale=0.0)


solving = True
while solving:
problemit = problemit + 1

a_phi = a_1 * (1 + phi)/2 + a_2 * (1 - phi)/2 

# Define variational problem
(y, c) = TrialFunction(W)
(v, d) = TestFunctions(W)
a = a_phi*(inner(grad(y), grad(v)) + c*v + y*d)*dx
L = f*v*dx + g_h*v*ds

# Compute solution
w = Function(W)
solve(a == L, w)
(y, c) = w.split()
# plot(y, interactive=True)

# solve dual problem
# Define variational problem

(p, c) = TrialFunction(W)
(v, d) = TestFunctions(W)
a = a_phi*(inner(grad(p), grad(v)) + c*v + p*d)*dx
L = (y-y_obs)*v*dx 

# Compute solution
w = Function(W)
solve(a == L, w)
(p, c) = w.split()

# Plot solution
# plot(p, interactive=True)


# Save solution in VTK format

print "using solver..."


u = Function(V)
du = TrialFunction(V)
v = TestFunction(V)
e_n = interpolate(Constant(1), V)


F_1 =  0.5 * (u) * v *dx + 0.5 * inner(nabla_grad(u), nabla_grad(v))*dx  
F_2 = lmbda * ( ( (- a_1 + a_2) * 0.5 * v ) * inner(nabla_grad(y), nabla_grad(p))) *dx 
# F_3 = lmbda *  sigma * epsln * (inner(nabla_grad(phi), nabla_grad(v)))*dx 
F_4 = - (1 + lmbda * sigma/epsln) * (phi * v)*dx 
# -a(phi,v)
# F_5 = - inner(nabla_grad(phi), nabla_grad(v))*dx 
# F_6 = - phi * v*dx 

F = F_1 + F_2  + F_4 

# F = sigma * epsln * inner(nabla_grad(u), nabla_grad(v))*dx+sigma * epsln * inner(nabla_grad(e_n -u), nabla_grad(-v))*dx  + ( - a_1 * v ) * inner(nabla_grad(y), nabla_grad(p)) *dx + sigma * epsln * (inner(nabla_grad(phi), nabla_grad(v)))*dx - sigma/epsln * (phi * v)*dx  

# test purpose
# F = inner(nabla_grad(phi - u), nabla_grad(-v)) *dx + (phi - u ) * -v *dx

# a = derivative(F, u, du)

# problem = NonlinearVariationalProblem(F, u, [], a)
# solver = NonlinearVariationalSolver(problem)
#solver.parameters["nonlinear_solver"] = "snes"
# solver.parameters["nonlinear_solver"] = "snes"
# solver.parameters["snes_solver"]["linear_solver"] = "lu"
# solver.parameters["snes_solver"]["method"] = "vinewtonrsls"
#info(solver.parameters, verbose=True)

problem = Subproblem(derivative(F, u, du), F)

solver = PETScSNESSolver()
solver.parameters["linear_solver"] = "lu"
#solver.parameters["convergence_criterion"] = "incremental"
#solver.parameters["relative_tolerance"] = 1e-6
solver.parameters['maximum_iterations']= 500

# vector for the lower and upper bound. 
lb = interpolate(Constant(-1) , V)
ub = interpolate(Constant(1) , V)

# solver.solve(lb.vector(), ub.vector())  # Bounded solve

solver.solve(problem, u.vector(), lb.vector(), ub.vector())  # Bounded solve


# plot(phi, title='u', scale=0.0, interactive=True)

u = u - phi

# plot(u, title='u', scale=0.0, interactive=True)


# u_0 = assemble(u*dx)

# plot(u, title='u', scale=0.0)

print "solver... finished"


print "calculating armijo steps..."

# u = u - u_0



alpha = 0.5
tau = 0.00001

# calculate j(phi) (need to be computed only once)
j_phi = assemble(0.5 * (y - y_obs) * (y - y_obs) * dx + sigma * epsln * 0.5 * inner(nabla_grad(phi), nabla_grad(phi)) * dx + ( sigma / epsln) * 0.5 * (e_n - phi * phi) * dx)
print "assemlbing j(phi) is done.."
# calculate j'(phi) * v  (needs to be computed only once)
F_a = assemble((( - a_1 * (1 + u)/2  + a_2 * (1 - u)/2 ) * inner(nabla_grad(y), nabla_grad(p)) *dx ))
print "assemlbing f' (1) is done.."
F_b = 0.5 * sigma * epsln * assemble(inner(nabla_grad(phi), nabla_grad(u))*dx)
print "assemlbing f' (2) is done.."
F_c = 0.5 * (- sigma/epsln) * assemble(phi * u*dx)

print "assemlbing f' (3) is done.."

max_iter = 100
for i in range (0,max_iter):
#set alpha due to armijo condition
alph_calc = alpha ** i

# Define variational problem for new phi value
phi_2 = phi + alph_calc * u
a_phi_2 = a_1 * (1 + phi_2)/2  + a_2 * (1 - phi_2)/2 

# Define variational problem
(y_2, c) = TrialFunction(W)
(v, d) = TestFunctions(W)
a = a_phi_2*(inner(grad(y_2), grad(v)) + c*v + y_2*d)*dx
L = f*v*dx + g_h*v*ds

# Compute solution
w = Function(W)
solve(a == L, w)
(y_2, c) = w.split()
print "solved variational problem for armijo steps.."

# Plot solution
# plot(y_2, interactive=True)




# calculate new j(phi + alpha * v)
#j_phi_new = assemble(0.5 * (y_2 - y_obs) * dx + sigma * epsln * 0.5 * inner(nabla_grad(phi_init_2), nabla_grad(phi_init_2)) * dx + ( sigma / epsln) * 0.5 * (e_n - phi_init_2 * phi_init_2) * dx)
j_phi_new = assemble(0.5 * (y_2 - y_obs) * (y_2 - y_obs) * dx + sigma * epsln * 0.5 * inner(nabla_grad(phi_2), nabla_grad(phi_2)) * dx + ( sigma / epsln) * 0.5 * (e_n - phi_2 * phi_2) * dx)
print "assembling j(phi_new) is done..."
# multiply with alpha
F = tau * alph_calc * (F_a + F_b + F_c)
print "alpha * f' has been set..."
#check if armijo condition is fullfilled
armijovalue = j_phi - j_phi_new + F

if armijovalue > - DOLFIN_EPS:
# plot(u, title='u', scale=0.0)
# plot(phi, title='phi', scale=0.0)
phi = phi + alph_calc * u
# print assemble(phi*dx)
# test = phi - phi_e
plot(phi_e - phi, title='phi', scale=0.0)
break

if i == 100:
print "ERROR!!!"

norm = assemble(u * u *dx + inner(nabla_grad(u),nabla_grad(u))*dx)
print "assembled norm - checking for convergence..."
print norm
print "current step"
print problemit
if(norm < 0.0001) :
time2 = time()
print "finished"
solving = False
print "after Iteration"
print problemit
print "time:"
print time2 - time

interactive()

print "End of Code..."


Jan Blechta

unread,
May 17, 2016, 10:06:14 AM5/17/16
to Mario Peutler, fenics-support
Due to purely technical reasons assembly on 'Real'/'R' space exhibits
quadratic complexity in number of DOFs. This has been partially
resolved by
https://bitbucket.org/fenics-project/dolfin/pull-requests/255/fix-quadratic-scaling-of-global-dofs/diff.
The part fixing quadratic allocation of PETSc matrix has been
postponed and will be reimplemented later.

If you really need to assemble on Real spaces of non-trivial size you
can build DOLFIN from the branch
https://bitbucket.org/fenics-project/dolfin/branch/jan/fix-slow-real
which has all the necessary fixes and exhibits linear complexity of
assembly on Real spaces.

Jan

Mario Peutler

unread,
May 19, 2016, 3:58:34 AM5/19/16
to fenics-support
Since I need my mean value to be zero,
I changed my code into solving


[...]
V = FunctionSpace(mesh, "CG", 1)
[...]
v = TestFunction(V)
a = a_phi_e*(inner(grad(y_obs), grad(v)))*dx
L = g_h*v*ds

y_obs = Function(V)
solve(a == L, y_obs)

y_obs = y_obs - assemble(y_obs*dx)


but still assembling is quite slow.

Mario Peutler

unread,
May 19, 2016, 4:07:51 AM5/19/16
to fenics-support
can you tell me which way is faster?

assemble(u*v*dx) + assemble(inner(grad(u),grad(v))*dx)

or

assemble(u*v*dx + inner(grad(u),(grad(v))*dx)

is there a difference/way to improve spped? 

Jan Blechta

unread,
May 19, 2016, 5:25:31 AM5/19/16
to Mario Peutler, fenics-support
Remove the "R" space from the code or build DOLFIN with
jan/fix-slow-real branch.

Jan

>

Jan Blechta

unread,
May 19, 2016, 5:27:20 AM5/19/16
to Mario Peutler, fenics-support
On Thu, 19 May 2016 01:07:51 -0700 (PDT)
Mario Peutler <mario....@gmail.com> wrote:

> can you tell me which way is faster?
>
> assemble(u*v*dx) + assemble(inner(grad(u),grad(v))*dx)
>
> or
>
> assemble(u*v*dx + inner(grad(u),(grad(v))*dx)
>
> is there a difference/way to improve spped?

Both are the same.

Assembly can be improved by

parameters['form_compiler']['representation'] = 'uflacs'
parameters['form_compiler']['optimize'] = True

issued in the beginning of your script.

Jan

Mario Peutler

unread,
May 19, 2016, 7:38:06 AM5/19/16
to fenics-support
I did remove the R Space. 
And I changed to 

parameters['form_compiler']['representation'] = 'uflacs' 
parameters['form_compiler']['optimize'] = True 

but the code is still pretty slow..

is it right to specify the expressions using

interpolate(Expression("(x[0]-0.5)*(x[0]-0.5)/(0.05*0.05)+(x[1]-0.5)*(x[1]-0.5)/(0.2*0.2) <=1 ? k0 : k1", k0=1, k1=-1, degree= 1, domain=mesh), V)
?
master.py

Jan Blechta

unread,
May 21, 2016, 1:56:54 PM5/21/16
to Mario Peutler, fenics-support
On Thu, 19 May 2016 04:38:05 -0700 (PDT)
Mario Peutler <mario....@gmail.com> wrote:

> I did remove the R Space.
> And I changed to
>
> parameters['form_compiler']['representation'] = 'uflacs'
> parameters['form_compiler']['optimize'] = True
>
> but the code is still pretty slow..

There's not much to say unless you provide MWE.

>
> is it right to specify the expressions using
>
> interpolate(Expression("(x[0]-0.5)*(x[0]-0.5)/(0.05*0.05)+(x[1]-0.5)*(x[1]-0.5)/(0.2*0.2)
> <=1 ? k0 : k1", k0=1, k1=-1, degree= 1, domain=mesh), V)

Expression is fine. Interpolation to function might be unnecessary.

Jan

Mario Peutler

unread,
May 23, 2016, 7:27:13 AM5/23/16
to fenics-support
I think I found the reason why my code is getting slow.

I am calculating an iterative solution by 

lb = interpolate(Constant(-1) , V)
ub = interpolate(Constant(1) , V)

solver.solve(problem, u.vector(), lb.vector(), ub.vector())  # Bounded solve

u = u - phi

phi = phi + alph_calc * u

where alpha_calc is my step length and u is my search direction.

when i print "phi = phi + alph_calc * u" line I get 

f_29 + f_54 + -1 * f_29 in the first iteration step


f_29 + f_54 + -1 * f_29 + f_2134 + -1 * (f_29 + f_54 + -1 * f_29) in the second step


f_29 + f_54 + -1 * f_29 + f_2134 + -1 * (f_29 + f_54 + -1 * f_29) + f_3062 + -1 * (f_29 + f_54 + -1 * f_29 + f_2134 + -1 * (f_29 + f_54 + -1 * f_29)) in the third step and so on..


is it possible to pass on the value, so that it does not need to be reevaluated in every step?

Jan Blechta

unread,
May 23, 2016, 7:47:11 AM5/23/16
to Mario Peutler, fenics-support
There will be a way for sure. You need to avoid blowing up complexity of
UFL coefficient and rather interpolate/project/axpy the auxiliary
variable.

Without minimal working example it is impossible to give concrete
advice. I'd recommend asking on http://fenicsproject.org/qa including
MWE.

Jan


On Mon, 23 May 2016 04:27:13 -0700 (PDT)

Marco Morandini

unread,
May 23, 2016, 8:04:07 AM5/23/16
to fenics-...@googlegroups.com
On 05/23/2016 01:27 PM, Mario Peutler wrote:
> I think I found the reason why my code is getting slow.
>
> I am calculating an iterative solution by
>
> lb = interpolate(Constant(-1) , V)
> ub = interpolate(Constant(1) , V)
>
> solver.solve(problem, u.vector(), lb.vector(), ub.vector()) # Bounded solve
>
> u = u - phi
>
> phi = phi + alph_calc * u
>

I assume here that phi is a Function. With

phi = phi + alph_calc * u

you are redefining the Function. I think you should change the value of
the stored vector instead. Something like

phi.vector()[:] += u.vector() * alph_calc

Marco


Mario Peutler

unread,
May 23, 2016, 8:18:30 AM5/23/16
to fenics-support
Thank you so much!!

You are my Hero :)

I feel pretty dumb though :D
Solved my problem.

Can be closed.
Reply all
Reply to author
Forward
0 new messages