$extrastylesheet
00001 // The libMesh Finite Element Library. 00002 // Copyright (C) 2002-2014 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner 00003 00004 // This library is free software; you can redistribute it and/or 00005 // modify it under the terms of the GNU Lesser General Public 00006 // License as published by the Free Software Foundation; either 00007 // version 2.1 of the License, or (at your option) any later version. 00008 00009 // This library is distributed in the hope that it will be useful, 00010 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 // Lesser General Public License for more details. 00013 00014 // You should have received a copy of the GNU Lesser General Public 00015 // License along with this library; if not, write to the Free Software 00016 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00017 00018 00019 00020 // C++ Includes ----------------------------------- 00021 00022 // Local Includes ----------------------------------- 00023 #include "libmesh/libmesh_config.h" 00024 #include "libmesh/mesh_base.h" 00025 #include "libmesh/parallel.h" // also includes mpi.h 00026 #include "libmesh/mesh_serializer.h" 00027 #include "libmesh/mesh_tools.h" 00028 #include "libmesh/mesh_communication.h" 00029 #include "libmesh/parmetis_partitioner.h" 00030 #include "libmesh/metis_partitioner.h" 00031 #include "libmesh/libmesh_logging.h" 00032 #include "libmesh/elem.h" 00033 00034 #ifdef LIBMESH_HAVE_PARMETIS 00035 00036 // Include the ParMETIS header files 00037 namespace Parmetis { 00038 extern "C" { 00039 # include "libmesh/ignore_warnings.h" 00040 # include "parmetis.h" 00041 # include "libmesh/restore_warnings.h" 00042 } 00043 } 00044 00045 #endif // #ifdef LIBMESH_HAVE_PARMETIS ... else ... 00046 00047 namespace libMesh 00048 { 00049 00050 // Minimum elements on each processor required for us to choose 00051 // Parmetis over Metis. 00052 const unsigned int MIN_ELEM_PER_PROC = 4; 00053 00054 00055 // ------------------------------------------------------------ 00056 // ParmetisPartitioner implementation 00057 void ParmetisPartitioner::_do_partition (MeshBase& mesh, 00058 const unsigned int n_sbdmns) 00059 { 00060 this->_do_repartition (mesh, n_sbdmns); 00061 } 00062 00063 00064 00065 void ParmetisPartitioner::_do_repartition (MeshBase& mesh, 00066 const unsigned int n_sbdmns) 00067 { 00068 libmesh_assert_greater (n_sbdmns, 0); 00069 00070 // Check for an easy return 00071 if (n_sbdmns == 1) 00072 { 00073 this->single_partition(mesh); 00074 return; 00075 } 00076 00077 // This function must be run on all processors at once 00078 libmesh_parallel_only(mesh.comm()); 00079 00080 // What to do if the Parmetis library IS NOT present 00081 #ifndef LIBMESH_HAVE_PARMETIS 00082 00083 libmesh_here(); 00084 libMesh::err << "ERROR: The library has been built without" << std::endl 00085 << "Parmetis support. Using a Metis" << std::endl 00086 << "partitioner instead!" << std::endl; 00087 00088 MetisPartitioner mp; 00089 00090 mp.partition (mesh, n_sbdmns); 00091 00092 // What to do if the Parmetis library IS present 00093 #else 00094 00095 // Revert to METIS on one processor. 00096 if (mesh.n_processors() == 1) 00097 { 00098 MetisPartitioner mp; 00099 mp.partition (mesh, n_sbdmns); 00100 return; 00101 } 00102 00103 START_LOG("repartition()", "ParmetisPartitioner"); 00104 00105 // Initialize the data structures required by ParMETIS 00106 this->initialize (mesh, n_sbdmns); 00107 00108 // Make sure all processors have enough active local elements. 00109 // Parmetis tends to crash when it's given only a couple elements 00110 // per partition. 00111 { 00112 bool all_have_enough_elements = true; 00113 for (processor_id_type pid=0; pid<_n_active_elem_on_proc.size(); pid++) 00114 if (_n_active_elem_on_proc[pid] < MIN_ELEM_PER_PROC) 00115 all_have_enough_elements = false; 00116 00117 // Parmetis will not work unless each processor has some 00118 // elements. Specifically, it will abort when passed a NULL 00119 // partition array on *any* of the processors. 00120 if (!all_have_enough_elements) 00121 { 00122 // FIXME: revert to METIS, although this requires a serial mesh 00123 MeshSerializer serialize(mesh); 00124 00125 STOP_LOG ("repartition()", "ParmetisPartitioner"); 00126 00127 MetisPartitioner mp; 00128 mp.partition (mesh, n_sbdmns); 00129 00130 return; 00131 } 00132 } 00133 00134 // build the graph corresponding to the mesh 00135 this->build_graph (mesh); 00136 00137 00138 // Partition the graph 00139 std::vector<int> vsize(_vwgt.size(), 1); 00140 float itr = 1000000.0; 00141 MPI_Comm mpi_comm = mesh.comm().get(); 00142 00143 // Call the ParMETIS adaptive repartitioning method. This respects the 00144 // original partitioning when computing the new partitioning so as to 00145 // minimize the required data redistribution. 00146 Parmetis::ParMETIS_V3_AdaptiveRepart(_vtxdist.empty() ? NULL : &_vtxdist[0], 00147 _xadj.empty() ? NULL : &_xadj[0], 00148 _adjncy.empty() ? NULL : &_adjncy[0], 00149 _vwgt.empty() ? NULL : &_vwgt[0], 00150 vsize.empty() ? NULL : &vsize[0], 00151 NULL, 00152 &_wgtflag, 00153 &_numflag, 00154 &_ncon, 00155 &_nparts, 00156 _tpwgts.empty() ? NULL : &_tpwgts[0], 00157 _ubvec.empty() ? NULL : &_ubvec[0], 00158 &itr, 00159 &_options[0], 00160 &_edgecut, 00161 _part.empty() ? NULL : &_part[0], 00162 &mpi_comm); 00163 00164 // Assign the returned processor ids 00165 this->assign_partitioning (mesh); 00166 00167 00168 STOP_LOG ("repartition()", "ParmetisPartitioner"); 00169 00170 #endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ... 00171 00172 } 00173 00174 00175 00176 // Only need to compile these methods if ParMETIS is present 00177 #ifdef LIBMESH_HAVE_PARMETIS 00178 00179 void ParmetisPartitioner::initialize (const MeshBase& mesh, 00180 const unsigned int n_sbdmns) 00181 { 00182 const dof_id_type n_active_local_elem = mesh.n_active_local_elem(); 00183 00184 // Set parameters. 00185 _wgtflag = 2; // weights on vertices only 00186 _ncon = 1; // one weight per vertex 00187 _numflag = 0; // C-style 0-based numbering 00188 _nparts = static_cast<int>(n_sbdmns); // number of subdomains to create 00189 _edgecut = 0; // the numbers of edges cut by the 00190 // partition 00191 00192 // Initialize data structures for ParMETIS 00193 _vtxdist.resize (mesh.n_processors()+1); std::fill (_vtxdist.begin(), _vtxdist.end(), 0); 00194 _tpwgts.resize (_nparts); std::fill (_tpwgts.begin(), _tpwgts.end(), 1./_nparts); 00195 _ubvec.resize (_ncon); std::fill (_ubvec.begin(), _ubvec.end(), 1.05); 00196 _part.resize (n_active_local_elem); std::fill (_part.begin(), _part.end(), 0); 00197 _options.resize (5); 00198 _vwgt.resize (n_active_local_elem); 00199 00200 // Set the options 00201 _options[0] = 1; // don't use default options 00202 _options[1] = 0; // default (level of timing) 00203 _options[2] = 15; // random seed (default) 00204 _options[3] = 2; // processor distribution and subdomain distribution are decoupled 00205 00206 // Find the number of active elements on each processor. We cannot use 00207 // mesh.n_active_elem_on_proc(pid) since that only returns the number of 00208 // elements assigned to pid which are currently stored on the calling 00209 // processor. This will not in general be correct for parallel meshes 00210 // when (pid!=mesh.processor_id()). 00211 _n_active_elem_on_proc.resize(mesh.n_processors()); 00212 mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc); 00213 00214 // count the total number of active elements in the mesh. Note we cannot 00215 // use mesh.n_active_elem() in general since this only returns the number 00216 // of active elements which are stored on the calling processor. 00217 // We should not use n_active_elem for any allocation because that will 00218 // be inheritly unscalable, but it can be useful for libmesh_assertions. 00219 dof_id_type n_active_elem=0; 00220 00221 // Set up the vtxdist array. This will be the same on each processor. 00222 // ***** Consult the Parmetis documentation. ***** 00223 libmesh_assert_equal_to (_vtxdist.size(), 00224 cast_int<std::size_t>(mesh.n_processors()+1)); 00225 libmesh_assert_equal_to (_vtxdist[0], 0); 00226 00227 for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) 00228 { 00229 _vtxdist[pid+1] = _vtxdist[pid] + _n_active_elem_on_proc[pid]; 00230 n_active_elem += _n_active_elem_on_proc[pid]; 00231 } 00232 libmesh_assert_equal_to (_vtxdist.back(), static_cast<int>(n_active_elem)); 00233 00234 // ParMetis expects the elements to be numbered in contiguous blocks 00235 // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ... 00236 // Since we only partition active elements we should have no expectation 00237 // that we currently have such a distribution. So we need to create it. 00238 // Also, at the same time we are going to map all the active elements into a globally 00239 // unique range [0,n_active_elem) which is *independent* of the current partitioning. 00240 // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled 00241 // from the partitioning of the objects themselves). This allows us to get the same 00242 // resultant partitioning independed of the input partitioning. 00243 MeshTools::BoundingBox bbox = 00244 MeshTools::bounding_box(mesh); 00245 00246 _global_index_by_pid_map.clear(); 00247 00248 // Maps active element ids into a contiguous range independent of partitioning. 00249 // (only needs local scope) 00250 vectormap<dof_id_type, dof_id_type> global_index_map; 00251 00252 { 00253 std::vector<dof_id_type> global_index; 00254 00255 // create the mapping which is contiguous by processor 00256 dof_id_type pid_offset=0; 00257 for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) 00258 { 00259 MeshBase::const_element_iterator it = mesh.active_pid_elements_begin(pid); 00260 const MeshBase::const_element_iterator end = mesh.active_pid_elements_end(pid); 00261 00262 // note that we may not have all (or any!) the active elements which belong on this processor, 00263 // but by calling this on all processors a unique range in [0,_n_active_elem_on_proc[pid]) 00264 // is constructed. Only the indices for the elements we pass in are returned in the array. 00265 MeshCommunication().find_global_indices (mesh.comm(), 00266 bbox, it, end, 00267 global_index); 00268 00269 for (dof_id_type cnt=0; it != end; ++it) 00270 { 00271 const Elem *elem = *it; 00272 libmesh_assert (!_global_index_by_pid_map.count(elem->id())); 00273 libmesh_assert_less (cnt, global_index.size()); 00274 libmesh_assert_less (global_index[cnt], _n_active_elem_on_proc[pid]); 00275 00276 _global_index_by_pid_map.insert(std::make_pair(elem->id(), global_index[cnt++] + pid_offset)); 00277 } 00278 00279 pid_offset += _n_active_elem_on_proc[pid]; 00280 } 00281 00282 // create the unique mapping for all active elements independent of partitioning 00283 { 00284 MeshBase::const_element_iterator it = mesh.active_elements_begin(); 00285 const MeshBase::const_element_iterator end = mesh.active_elements_end(); 00286 00287 // Calling this on all processors a unique range in [0,n_active_elem) is constructed. 00288 // Only the indices for the elements we pass in are returned in the array. 00289 MeshCommunication().find_global_indices (mesh.comm(), 00290 bbox, it, end, 00291 global_index); 00292 00293 for (dof_id_type cnt=0; it != end; ++it) 00294 { 00295 const Elem *elem = *it; 00296 libmesh_assert (!global_index_map.count(elem->id())); 00297 libmesh_assert_less (cnt, global_index.size()); 00298 libmesh_assert_less (global_index[cnt], n_active_elem); 00299 00300 global_index_map.insert(std::make_pair(elem->id(), global_index[cnt++])); 00301 } 00302 } 00303 // really, shouldn't be close! 00304 libmesh_assert_less_equal (global_index_map.size(), n_active_elem); 00305 libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem); 00306 00307 // At this point the two maps should be the same size. If they are not 00308 // then the number of active elements is not the same as the sum over all 00309 // processors of the number of active elements per processor, which means 00310 // there must be some unpartitioned objects out there. 00311 if (global_index_map.size() != _global_index_by_pid_map.size()) 00312 libmesh_error_msg("ERROR: ParmetisPartitioner cannot handle unpartitioned objects!"); 00313 } 00314 00315 // Finally, we need to initialize the vertex (partition) weights and the initial subdomain 00316 // mapping. The subdomain mapping will be independent of the processor mapping, and is 00317 // defined by a simple mapping of the global indices we just found. 00318 { 00319 std::vector<dof_id_type> subdomain_bounds(mesh.n_processors()); 00320 00321 const dof_id_type first_local_elem = _vtxdist[mesh.processor_id()]; 00322 00323 for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) 00324 { 00325 dof_id_type tgt_subdomain_size = 0; 00326 00327 // watch out for the case that n_subdomains < n_processors 00328 if (pid < static_cast<unsigned int>(_nparts)) 00329 { 00330 tgt_subdomain_size = n_active_elem/std::min 00331 (cast_int<int>(mesh.n_processors()), _nparts); 00332 00333 if (pid < n_active_elem%_nparts) 00334 tgt_subdomain_size++; 00335 } 00336 if (pid == 0) 00337 subdomain_bounds[0] = tgt_subdomain_size; 00338 else 00339 subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size; 00340 } 00341 00342 libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem); 00343 00344 MeshBase::const_element_iterator elem_it = mesh.active_local_elements_begin(); 00345 const MeshBase::const_element_iterator elem_end = mesh.active_local_elements_end(); 00346 00347 for (; elem_it != elem_end; ++elem_it) 00348 { 00349 const Elem *elem = *elem_it; 00350 00351 libmesh_assert (_global_index_by_pid_map.count(elem->id())); 00352 const dof_id_type global_index_by_pid = 00353 _global_index_by_pid_map[elem->id()]; 00354 libmesh_assert_less (global_index_by_pid, n_active_elem); 00355 00356 const dof_id_type local_index = 00357 global_index_by_pid - first_local_elem; 00358 00359 libmesh_assert_less (local_index, n_active_local_elem); 00360 libmesh_assert_less (local_index, _vwgt.size()); 00361 00362 // TODO:[BSK] maybe there is a better weight? 00363 _vwgt[local_index] = elem->n_nodes(); 00364 00365 // find the subdomain this element belongs in 00366 libmesh_assert (global_index_map.count(elem->id())); 00367 const dof_id_type global_index = 00368 global_index_map[elem->id()]; 00369 00370 libmesh_assert_less (global_index, subdomain_bounds.back()); 00371 00372 const unsigned int subdomain_id = 00373 std::distance(subdomain_bounds.begin(), 00374 std::lower_bound(subdomain_bounds.begin(), 00375 subdomain_bounds.end(), 00376 global_index)); 00377 libmesh_assert_less (subdomain_id, static_cast<unsigned int>(_nparts)); 00378 libmesh_assert_less (local_index, _part.size()); 00379 00380 _part[local_index] = subdomain_id; 00381 } 00382 } 00383 } 00384 00385 00386 00387 void ParmetisPartitioner::build_graph (const MeshBase& mesh) 00388 { 00389 // build the graph in distributed CSR format. Note that 00390 // the edges in the graph will correspond to 00391 // face neighbors 00392 const dof_id_type n_active_local_elem = mesh.n_active_local_elem(); 00393 00394 #ifdef LIBMESH_ENABLE_AMR 00395 std::vector<const Elem*> neighbors_offspring; 00396 #endif 00397 00398 std::vector<std::vector<dof_id_type> > graph(n_active_local_elem); 00399 dof_id_type graph_size=0; 00400 00401 const dof_id_type first_local_elem = _vtxdist[mesh.processor_id()]; 00402 00403 MeshBase::const_element_iterator elem_it = mesh.active_local_elements_begin(); 00404 const MeshBase::const_element_iterator elem_end = mesh.active_local_elements_end(); 00405 00406 for (; elem_it != elem_end; ++elem_it) 00407 { 00408 const Elem* elem = *elem_it; 00409 00410 libmesh_assert (_global_index_by_pid_map.count(elem->id())); 00411 const dof_id_type global_index_by_pid = 00412 _global_index_by_pid_map[elem->id()]; 00413 00414 const dof_id_type local_index = 00415 global_index_by_pid - first_local_elem; 00416 libmesh_assert_less (local_index, n_active_local_elem); 00417 00418 std::vector<dof_id_type> &graph_row = graph[local_index]; 00419 00420 // Loop over the element's neighbors. An element 00421 // adjacency corresponds to a face neighbor 00422 for (unsigned int ms=0; ms<elem->n_neighbors(); ms++) 00423 { 00424 const Elem* neighbor = elem->neighbor(ms); 00425 00426 if (neighbor != NULL) 00427 { 00428 // If the neighbor is active treat it 00429 // as a connection 00430 if (neighbor->active()) 00431 { 00432 libmesh_assert(_global_index_by_pid_map.count(neighbor->id())); 00433 const dof_id_type neighbor_global_index_by_pid = 00434 _global_index_by_pid_map[neighbor->id()]; 00435 00436 graph_row.push_back(neighbor_global_index_by_pid); 00437 graph_size++; 00438 } 00439 00440 #ifdef LIBMESH_ENABLE_AMR 00441 00442 // Otherwise we need to find all of the 00443 // neighbor's children that are connected to 00444 // us and add them 00445 else 00446 { 00447 // The side of the neighbor to which 00448 // we are connected 00449 const unsigned int ns = 00450 neighbor->which_neighbor_am_i (elem); 00451 libmesh_assert_less (ns, neighbor->n_neighbors()); 00452 00453 // Get all the active children (& grandchildren, etc...) 00454 // of the neighbor. 00455 neighbor->active_family_tree (neighbors_offspring); 00456 00457 // Get all the neighbor's children that 00458 // live on that side and are thus connected 00459 // to us 00460 for (unsigned int nc=0; nc<neighbors_offspring.size(); nc++) 00461 { 00462 const Elem* child = 00463 neighbors_offspring[nc]; 00464 00465 // This does not assume a level-1 mesh. 00466 // Note that since children have sides numbered 00467 // coincident with the parent then this is a sufficient test. 00468 if (child->neighbor(ns) == elem) 00469 { 00470 libmesh_assert (child->active()); 00471 libmesh_assert (_global_index_by_pid_map.count(child->id())); 00472 const dof_id_type child_global_index_by_pid = 00473 _global_index_by_pid_map[child->id()]; 00474 00475 graph_row.push_back(child_global_index_by_pid); 00476 graph_size++; 00477 } 00478 } 00479 } 00480 00481 #endif /* ifdef LIBMESH_ENABLE_AMR */ 00482 00483 00484 } 00485 } 00486 } 00487 00488 // Reserve space in the adjacency array 00489 _xadj.clear(); 00490 _xadj.reserve (n_active_local_elem + 1); 00491 _adjncy.clear(); 00492 _adjncy.reserve (graph_size); 00493 00494 for (std::size_t r=0; r<graph.size(); r++) 00495 { 00496 _xadj.push_back(_adjncy.size()); 00497 std::vector<dof_id_type> graph_row; // build this emtpy 00498 graph_row.swap(graph[r]); // this will deallocate at the end of scope 00499 _adjncy.insert(_adjncy.end(), 00500 graph_row.begin(), 00501 graph_row.end()); 00502 } 00503 00504 // The end of the adjacency array for the last elem 00505 _xadj.push_back(_adjncy.size()); 00506 00507 libmesh_assert_equal_to (_xadj.size(), n_active_local_elem+1); 00508 libmesh_assert_equal_to (_adjncy.size(), graph_size); 00509 } 00510 00511 00512 00513 void ParmetisPartitioner::assign_partitioning (MeshBase& mesh) 00514 { 00515 // This function must be run on all processors at once 00516 libmesh_parallel_only(mesh.comm()); 00517 00518 const dof_id_type 00519 first_local_elem = _vtxdist[mesh.processor_id()]; 00520 00521 std::vector<std::vector<dof_id_type> > 00522 requested_ids(mesh.n_processors()), 00523 requests_to_fill(mesh.n_processors()); 00524 00525 MeshBase::element_iterator elem_it = mesh.active_elements_begin(); 00526 MeshBase::element_iterator elem_end = mesh.active_elements_end(); 00527 00528 for (; elem_it != elem_end; ++elem_it) 00529 { 00530 Elem *elem = *elem_it; 00531 00532 // we need to get the index from the owning processor 00533 // (note we cannot assign it now -- we are iterating 00534 // over elements again and this will be bad!) 00535 libmesh_assert_less (elem->processor_id(), requested_ids.size()); 00536 requested_ids[elem->processor_id()].push_back(elem->id()); 00537 } 00538 00539 // Trade with all processors (including self) to get their indices 00540 for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) 00541 { 00542 // Trade my requests with processor procup and procdown 00543 const processor_id_type procup = (mesh.processor_id() + pid) % 00544 mesh.n_processors(); 00545 const processor_id_type procdown = (mesh.n_processors() + 00546 mesh.processor_id() - pid) % 00547 mesh.n_processors(); 00548 00549 mesh.comm().send_receive (procup, requested_ids[procup], 00550 procdown, requests_to_fill[procdown]); 00551 00552 // we can overwrite these requested ids in-place. 00553 for (std::size_t i=0; i<requests_to_fill[procdown].size(); i++) 00554 { 00555 const dof_id_type requested_elem_index = 00556 requests_to_fill[procdown][i]; 00557 00558 libmesh_assert(_global_index_by_pid_map.count(requested_elem_index)); 00559 00560 const dof_id_type global_index_by_pid = 00561 _global_index_by_pid_map[requested_elem_index]; 00562 00563 const dof_id_type local_index = 00564 global_index_by_pid - first_local_elem; 00565 00566 libmesh_assert_less (local_index, _part.size()); 00567 libmesh_assert_less (local_index, mesh.n_active_local_elem()); 00568 00569 const unsigned int elem_procid = 00570 static_cast<unsigned int>(_part[local_index]); 00571 00572 libmesh_assert_less (elem_procid, static_cast<unsigned int>(_nparts)); 00573 00574 requests_to_fill[procdown][i] = elem_procid; 00575 } 00576 00577 // Trade back 00578 mesh.comm().send_receive (procdown, requests_to_fill[procdown], 00579 procup, requested_ids[procup]); 00580 } 00581 00582 // and finally assign the partitioning. 00583 // note we are iterating in exactly the same order 00584 // used to build up the request, so we can expect the 00585 // required entries to be in the proper sequence. 00586 elem_it = mesh.active_elements_begin(); 00587 elem_end = mesh.active_elements_end(); 00588 00589 for (std::vector<unsigned int> counters(mesh.n_processors(), 0); 00590 elem_it != elem_end; ++elem_it) 00591 { 00592 Elem *elem = *elem_it; 00593 00594 const processor_id_type current_pid = elem->processor_id(); 00595 00596 libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size()); 00597 00598 const processor_id_type elem_procid = 00599 requested_ids[current_pid][counters[current_pid]++]; 00600 00601 libmesh_assert_less (elem_procid, static_cast<unsigned int>(_nparts)); 00602 elem->processor_id() = elem_procid; 00603 } 00604 } 00605 00606 #endif // #ifdef LIBMESH_HAVE_PARMETIS 00607 00608 } // namespace libMesh