Hi deal.ii community,
I am bothering today about a code I have been writing in the shared triangulation framework and its transition to the parallel::distributed::Triangulation<dim> framework.
The shared version works just fine, and so does the parallel if run with 1 single process. I understand therefore that the numerics seem to work OK, but since I am having issues with more than one process, likely I am not understanding some fundamentals of the parallel::distributed::Triangulation<dim> framework.
In particular it seems that a call of type
std::vector< Vector< double > > old_solution_values ( quadrature_formula.size(), Vector< double >(dofs.GPDofs) );
fe_values.get_function_values ( locally_accumulated_displacement, old_solution_values );
provides nan, that propagates and make the code to fail. I wonder if this fact is due to DoFs that are not locally owned within cells that are indeed owned locally.
In such a case, I am doing an implementation which is not appropriate and I wonder how to accomplish it properly.
More details follows.
I appreciate your help as usual.
Alberto
----
The code is a standard 3 fields mechanics formulation, small strains.
I have thus implemented a Newton Raphson scheme. The interesting part of the class definition is:
template <int dim>
class SmallStrainMechanicalProblem
{
public:
SmallStrainMechanicalProblem ( const std::string, const unsigned = 1, const unsigned = 1 );
~SmallStrainMechanicalProblem ();
void run ( unsigned, bool, bool=false );
private:
// Main methods
...
// quadrature point integrators
...
// MPI data structure
MPI_Comm mpi_communicator;
const unsigned int n_mpi_processes;
const unsigned int this_mpi_process;
ConditionalOStream pcout;
std::vector<types::global_dof_index> local_dofs_per_process;
std::vector<types::global_dof_index> starting_index_per_process;
std::vector<types::global_dof_index> ending_index_per_process;
IndexSet locally_owned_dofs;
IndexSet locally_relevant_dofs;
unsigned int n_local_cells;
// FEM member variables
parallel::distributed::Triangulation<dim> triangulation;
DoFHandler<dim> dof_handler;
FESystem<dim> fe;
ConstraintMatrix hanging_node_constraints;
const QGauss<dim> quadrature_formula;
const QGauss<dim-1> face_quadrature_formula;
LA::MPI::SparseMatrix system_matrix;
LA::MPI::Vector locally_relevant_solution; // stores owned elements and also ghost entries. Initiated in setup_system().
LA::MPI::Vector locally_incremental_displacement; // stores owned elements only. Initiated in setup_system(), in refine_initial_grid() and in create_coarse_grid()
LA::MPI::Vector locally_accumulated_displacement; // stores owned elements only. Initiated in setup_system(), in refine_initial_grid() and in create_coarse_grid()
LA::MPI::Vector system_rhs;
// Constitutive laws
...
// IO
...
debug_parallel_IO dbgIO;
};
Here is the constructor:
template <int dim>
SmallStrainMechanicalProblem<dim>::SmallStrainMechanicalProblem (
const std::string pathstr,
const unsigned poly_degree,
const unsigned printEveryNSteps)
:
mpi_communicator (MPI_COMM_WORLD),
n_mpi_processes (Utilities::MPI::n_mpi_processes(mpi_communicator)),
this_mpi_process (Utilities::MPI::this_mpi_process(mpi_communicator)),
pcout (std::cout, this_mpi_process == 0),
starting_index_per_process( n_mpi_processes ),
ending_index_per_process( n_mpi_processes ),
triangulation(mpi_communicator,
typename Triangulation<dim>::MeshSmoothing
(Triangulation<dim>::smoothing_on_refinement |
Triangulation<dim>::smoothing_on_coarsening)),
dof_handler (triangulation),
fe(FE_Q<dim>(poly_degree), dim, // displacement
FE_DGPMonomial<dim>(poly_degree - 1), 1, // pressure
FE_DGPMonomial<dim>(poly_degree - 1), 1), // dilatation
quadrature_formula (poly_degree + 1),
face_quadrature_formula (4),
PrintEveryNSteps( printEveryNSteps ),
mmIO( pathstr, this_mpi_process ),
IO( pathstr + "IO", this_mpi_process ),
dbgIO( pathstr, this_mpi_process )
{
...
}
the setup:
template <int dim>
void SmallStrainMechanicalProblem<dim>::setup_system ()
{
dof_handler.distribute_dofs (fe);
locally_owned_dofs = dof_handler.locally_owned_dofs();
DoFTools::extract_locally_relevant_dofs (dof_handler,locally_relevant_dofs);
local_dofs_per_process = dof_handler.n_locally_owned_dofs_per_processor();
starting_index_per_process[ 0 ] = 0;
ending_index_per_process[ 0 ] = local_dofs_per_process[ 0 ] - 1;
for (unsigned ii=1; ii< n_mpi_processes; ii++)
{
starting_index_per_process[ ii ] = local_dofs_per_process[ ii-1 ] + starting_index_per_process[ ii-1 ] ;
ending_index_per_process[ ii ] = local_dofs_per_process[ ii ] + ending_index_per_process[ ii-1 ] ;
}
locally_relevant_solution.reinit (locally_owned_dofs, locally_relevant_dofs, mpi_communicator);
locally_incremental_displacement.reinit (locally_owned_dofs, mpi_communicator);
system_rhs.reinit (locally_owned_dofs, mpi_communicator);
// debug
dbgIO.dbg() << "\n" << "In setup_system(): \n n_dofs() = " << dof_handler.n_dofs() << "\n" << std::flush;
dbgIO.dbg() << " n_locally_owned_dofs() = " << dof_handler.n_locally_owned_dofs () << "\n" << std::flush;
dbgIO.dbg() << " local_dofs_per_process = " << std::flush;
for ( unsigned ii=0; ii < local_dofs_per_process.size(); ii++)
dbgIO.dbg() << local_dofs_per_process[ ii ] << ", " << std::flush;
dbgIO.dbg() << "\n" << " starting_index_per_process = " << std::flush;
for ( unsigned ii=0; ii < starting_index_per_process.size(); ii++)
dbgIO.dbg() << starting_index_per_process[ ii ] << ", " << std::flush;
dbgIO.dbg() << "\n" << " ending_index_per_process = " << std::flush;
for ( unsigned ii=0; ii < ending_index_per_process.size(); ii++)
dbgIO.dbg() << ending_index_per_process[ ii ] << ", " << std::flush;
dbgIO.dbg() << "\n" << " locally_owned_dofs = " << std::flush;
locally_owned_dofs.print( dbgIO.dbg() );
hanging_node_constraints.clear ();
hanging_node_constraints.reinit (locally_relevant_dofs);
DoFTools::make_hanging_node_constraints (dof_handler,
hanging_node_constraints);
hanging_node_constraints.close ();
DynamicSparsityPattern sparsity_pattern (locally_relevant_dofs);
DoFTools::make_sparsity_pattern (dof_handler, sparsity_pattern,
hanging_node_constraints, /*keep constrained dofs*/ false);
SparsityTools::distribute_sparsity_pattern (sparsity_pattern,
local_dofs_per_process,
mpi_communicator,
locally_relevant_dofs);
system_matrix.reinit (locally_owned_dofs,
locally_owned_dofs,
sparsity_pattern,
mpi_communicator);
}
and here where the issue lays:
template <int dim>
void SmallStrainMechanicalProblem<dim>::assemble_system_nr (
const TimeIntegrationDataManager* TIDM,
const unsigned NRit
)
//! system assembled for Newton-Raphson
{
// FE data and structures
FEValues<dim> fe_values (fe, quadrature_formula,
update_values | update_gradients |
update_quadrature_points | update_JxW_values);
FEFaceValues<dim> fe_face_values (fe, face_quadrature_formula,
update_values | update_quadrature_points |
update_normal_vectors | update_JxW_values);
const unsigned int dofs_per_cell = fe.dofs_per_cell;
const unsigned int n_q_points = quadrature_formula.size();
const unsigned int n_face_q_points = face_quadrature_formula.size();
FullMatrix<double> cell_matrix (dofs_per_cell, dofs_per_cell);
Vector<double> cell_rhs (dofs_per_cell);
std::vector<types::global_dof_index> local_dof_indices (dofs_per_cell);
const FEValuesExtractors::Vector displacements (dofs.first_u_component);
const FEValuesExtractors::Scalar pressure(dofs.p_component);
const FEValuesExtractors::Scalar dilatation(dofs.J_component);
...
// Evaluation
typename DoFHandler<dim>::active_cell_iterator cell = dof_handler.begin_active(),
endc = dof_handler.end();
int cellit = -1;
for (; cell!=endc; ++cell)
if (cell->is_locally_owned())
{
++cellit;
cell_matrix = 0;
cell_rhs = 0;
// u,p,J from the former solution. fe_values must be reinitializated beforehand
// fe_values reinitialization
fe_values.reinit (cell);
std::vector< Vector< double > > old_solution_values ( quadrature_formula.size(), Vector< double >(dofs.GPDofs) );
fe_values.get_function_values ( locally_accumulated_displacement, old_solution_values );
// fe_values.get_function_values (accumulated_displacement, old_solution_values);
// debug
dbgIO.dbg() << "In assemble_system_nr \n locally_accumulated_displacement =" << std::flush;
for ( unsigned ii= starting_index_per_process[ this_mpi_process ]; ii <= ending_index_per_process[ this_mpi_process ]; ii++)
dbgIO.dbg() << std::setw(8) << std::setprecision(4) << locally_accumulated_displacement[ ii ] << ", " << std::flush;
dbgIO.dbg() << "\n" << std::flush;
dbgIO.dbg() << "\n" << "old_solution_values =" << std::flush;
for (unsigned ii=0; ii<n_q_points; ii++ )
dbgIO.dbg() << old_solution_values[ii] << std::flush;
.....
}
}
the output for process 1 is:
Debugger for the SmallStrainMechanicalProblem class defined on processor 1.
In setup_system():
n_dofs() = 82
n_locally_owned_dofs() = 36
local_dofs_per_process = 46, 36,
starting_index_per_process = 0, 46,
ending_index_per_process = 45, 81,
locally_owned_dofs = {[46,81]}
locally_accumulated_displacement = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
old_solution_values =nan 1.320e-314 0.000e+00 0.000e+00
nan 3.537e-315 0.000e+00 0.000e+00
nan 3.537e-315 0.000e+00 0.000e+00
nan 9.476e-316 0.000e+00 0.000e+00
Given the code snippet above, I wonder whether
locally_accumulated_displacement is a vector that only has the locally owned
dofs, or whether it is a vector that has at least the locally active elements
(or maybe also the ghost/locally relevant elements). If you call
fe_values.get_function_values() on a locally owned cell, the
locally_accumulated_displacement vector needs to have at least the locally
active vector elements.