$extrastylesheet
parallel_implementation.h
Go to the documentation of this file.
00001 // The libMesh Finite Element Library.
00002 // Copyright (C) 2002-2014 Benjamin S. Kirk, John W. Peterson, Roy H. Stogner
00003 
00004 // This library is free software; you can redistribute it and/or
00005 // modify it under the terms of the GNU Lesser General Public
00006 // License as published by the Free Software Foundation; either
00007 // version 2.1 of the License, or (at your option) any later version.
00008 
00009 // This library is distributed in the hope that it will be useful,
00010 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012 // Lesser General Public License for more details.
00013 
00014 // You should have received a copy of the GNU Lesser General Public
00015 // License along with this library; if not, write to the Free Software
00016 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017 
00018 
00019 #ifndef LIBMESH_PARALLEL_IMPLEMENTATION_H
00020 #define LIBMESH_PARALLEL_IMPLEMENTATION_H
00021 
00022 // Local includes
00023 #include "parallel.h"
00024 #include "libmesh_logging.h"
00025 
00026 // C++ includes
00027 #include <iterator> // iterator_traits
00028 
00029 namespace libMesh {
00030 namespace Parallel {
00031 
00032 // First declare StandardType specializations so we can use them in anonymous
00033 // helper functions later
00034 
00035 #ifdef LIBMESH_HAVE_MPI
00036 
00037 #define STANDARD_TYPE(cxxtype,mpitype)                                  \
00038   template<>                                                            \
00039   class StandardType<cxxtype> : public DataType                         \
00040   {                                                                     \
00041   public:                                                               \
00042     explicit                                                            \
00043       StandardType(const cxxtype* = NULL) : DataType(mpitype) {}        \
00044   }
00045 
00046 #else
00047 
00048 #define STANDARD_TYPE(cxxtype,mpitype)                          \
00049   template<>                                                    \
00050   class StandardType<cxxtype> : public DataType                 \
00051   {                                                             \
00052   public:                                                       \
00053     explicit                                                    \
00054       StandardType(const cxxtype* = NULL) : DataType() {}       \
00055   }
00056 
00057 #endif
00058 
00059 #define INT_TYPE(cxxtype,mpitype)                                       \
00060   STANDARD_TYPE(cxxtype,mpitype);                                       \
00061                                                                         \
00062   template<>                                                            \
00063   struct Attributes<cxxtype>                                            \
00064   {                                                                     \
00065     static const bool has_min_max = true;                               \
00066     static void set_lowest(cxxtype& x) { x = std::numeric_limits<cxxtype>::min(); } \
00067     static void set_highest(cxxtype& x) { x = std::numeric_limits<cxxtype>::max(); } \
00068   }
00069 
00070 #define FLOAT_TYPE(cxxtype,mpitype)                                     \
00071   STANDARD_TYPE(cxxtype,mpitype);                                       \
00072                                                                         \
00073   template<>                                                            \
00074   struct Attributes<cxxtype>                                            \
00075   {                                                                     \
00076     static const bool has_min_max = true;                               \
00077     static void set_lowest(cxxtype& x) { x = -std::numeric_limits<cxxtype>::max(); } \
00078     static void set_highest(cxxtype& x) { x = std::numeric_limits<cxxtype>::max(); } \
00079   }
00080 
00081 #define CONTAINER_TYPE(cxxtype)                                         \
00082   template<typename T>                                                  \
00083   struct Attributes<cxxtype<T> >                                        \
00084   {                                                                     \
00085     static const bool has_min_max = Attributes<T>::has_min_max;         \
00086     static void set_lowest(cxxtype<T>& x) {                             \
00087       for (typename cxxtype<T>::iterator i = x.begin(); i != x.end(); ++i) \
00088         Attributes<T>::set_lowest(*i); }                                \
00089     static void set_highest(cxxtype<T>& x) {                            \
00090       for (typename cxxtype<T>::iterator i = x.begin(); i != x.end(); ++i) \
00091         Attributes<T>::set_highest(*i); }                               \
00092   }
00093 
00094 
00095 INT_TYPE(char,MPI_CHAR);
00096 #if MPI_VERSION > 1
00097 INT_TYPE(signed char,MPI_SIGNED_CHAR);
00098 #endif
00099 INT_TYPE(unsigned char,MPI_UNSIGNED_CHAR);
00100 INT_TYPE(short int,MPI_SHORT);
00101 INT_TYPE(unsigned short int,MPI_UNSIGNED_SHORT);
00102 INT_TYPE(int,MPI_INT);
00103 INT_TYPE(unsigned int,MPI_UNSIGNED);
00104 INT_TYPE(long,MPI_LONG);
00105 INT_TYPE(unsigned long,MPI_UNSIGNED_LONG);
00106 INT_TYPE(unsigned long long,MPI_LONG_LONG_INT);
00107 FLOAT_TYPE(float,MPI_FLOAT);
00108 FLOAT_TYPE(double,MPI_DOUBLE);
00109 FLOAT_TYPE(long double,MPI_LONG_DOUBLE);
00110 CONTAINER_TYPE(std::set);
00111 CONTAINER_TYPE(std::vector);
00112 
00113 // We'd love to do a singleton pattern on derived data types, rather
00114 // than commit, free, commit, free, ad infinitum... but it's a
00115 // little tricky when our T1 and T2 are undefined.
00116 template<typename T1, typename T2>
00117 class StandardType<std::pair<T1, T2> > : public DataType
00118 {
00119 public:
00120   explicit
00121   StandardType(const std::pair<T1, T2> *example = NULL) {
00122     // We need an example for MPI_Address to use
00123     libmesh_assert(example);
00124 
00125 #ifdef LIBMESH_HAVE_MPI
00126     // Get the sub-data-types, and make sure they live long enough
00127     // to construct the derived type
00128     StandardType<T1> d1(&example->first);
00129     StandardType<T2> d2(&example->second);
00130     MPI_Datatype types[] = { (data_type)d1, (data_type)d2 };
00131     int blocklengths[] = {1,1};
00132 
00133     MPI_Aint displs[2];
00134 #if MPI_VERSION > 1
00135     MPI_Get_address (const_cast<T1*>(&example->first), &displs[0]);
00136     MPI_Get_address (const_cast<T2*>(&example->second), &displs[1]);
00137 #else
00138     MPI_Address (const_cast<T1*>(&example->first), &displs[0]);
00139     MPI_Address (const_cast<T2*>(&example->second), &displs[1]);
00140 #endif
00141     displs[1] -= displs[0];
00142     displs[0] = 0;
00143 
00144 #if MPI_VERSION > 1
00145     MPI_Type_create_struct (2, blocklengths, displs, types, &_datatype);
00146 #else
00147     MPI_Type_struct (2, blocklengths, displs, types, &_datatype);
00148 #endif // #if MPI_VERSION > 1
00149     MPI_Type_commit (&_datatype);
00150 #endif // LIBMESH_HAVE_MPI
00151   }
00152 
00153   ~StandardType() { this->free(); }
00154 };
00155 
00156 template<typename T>
00157 class StandardType<std::complex<T> > : public DataType
00158 {
00159 public:
00160   explicit
00161   StandardType(const std::complex<T> * /*example*/ = NULL) :
00162     DataType(StandardType<T>(NULL), 2) {}
00163 
00164   ~StandardType() { this->free(); }
00165 };
00166 
00167 } // namespace Parallel
00168 
00169 } // namespace libMesh
00170 
00171 
00172 // Anonymous namespace for helper functions
00173 namespace {
00174 
00175 // Internal helper function to create vector<something_useable> from
00176 // vector<bool> for compatibility with MPI bitwise operations
00177 template <typename T>
00178 inline void pack_vector_bool(const std::vector<bool> &in,
00179                              std::vector<T> &out)
00180 {
00181   unsigned int data_bits = 8*sizeof(T);
00182   std::size_t in_size = in.size();
00183   std::size_t out_size = in_size/data_bits + ((in_size%data_bits)?1:0);
00184   out.clear();
00185   out.resize(out_size);
00186   for (std::size_t i=0; i != in_size; ++i)
00187     {
00188       std::size_t index = i/data_bits;
00189       std::size_t offset = i%data_bits;
00190       out[index] += (in[i]?1:0) << offset;
00191     }
00192 }
00193 
00194 // Internal helper function to create vector<bool> from
00195 // vector<something usable> for compatibility with MPI byte
00196 // operations
00197 template <typename T>
00198 inline void unpack_vector_bool(const std::vector<T> &in,
00199                                std::vector<bool> &out)
00200 {
00201   unsigned int data_bits = 8*sizeof(T);
00202   // We need the output vector to already be properly sized
00203   std::size_t out_size = out.size();
00204   libmesh_assert_equal_to (out_size/data_bits + (out_size%data_bits?1:0), in.size());
00205 
00206   for (std::size_t i=0; i != out_size; ++i)
00207     {
00208       std::size_t index = i/data_bits;
00209       std::size_t offset = i%data_bits;
00210       out[i] = in[index] << (data_bits-1-offset) >> (data_bits-1);
00211     }
00212 }
00213 
00214 
00215 #ifdef LIBMESH_HAVE_MPI
00216 // We use a helper function here to avoid ambiguity when calling
00217 // send_receive of (vector<vector<T>>,vector<vector<T>>)
00218 template <typename T1, typename T2>
00219 inline void send_receive_vec_of_vec
00220 (const unsigned int dest_processor_id,
00221  std::vector<std::vector<T1> > &send,
00222  const unsigned int source_processor_id,
00223  std::vector<std::vector<T2> > &recv,
00224  const libMesh::Parallel::MessageTag &send_tag,
00225  const libMesh::Parallel::MessageTag &recv_tag,
00226  const libMesh::Parallel::Communicator &comm)
00227 {
00228   START_LOG("send_receive()", "Parallel");
00229 
00230   if (dest_processor_id   == comm.rank() &&
00231       source_processor_id == comm.rank())
00232     {
00233       recv = send;
00234       STOP_LOG("send_receive()", "Parallel");
00235       return;
00236     }
00237 
00238   // temporary buffers - these will be sized in bytes
00239   // and manipulated with MPI_Pack and friends
00240   std::vector<char> sendbuf, recvbuf;
00241 
00242   // figure out how many bytes we need to pack all the data
00243   int packedsize=0, sendsize=0;
00244 
00245   // The outer buffer size
00246   MPI_Pack_size (1,
00247                  libMesh::Parallel::StandardType<unsigned int>(),
00248                  comm.get(),
00249                  &packedsize);
00250   sendsize += packedsize;
00251 
00252   for (std::size_t i=0; i<send.size(); i++)
00253     {
00254       // The size of the ith inner buffer
00255       MPI_Pack_size (1,
00256                      libMesh::Parallel::StandardType<unsigned int>(),
00257                      comm.get(),
00258                      &packedsize);
00259       sendsize += packedsize;
00260 
00261       // The data for each inner buffer
00262       MPI_Pack_size (libMesh::cast_int<int>(send[i].size()),
00263                      libMesh::Parallel::StandardType<T1>
00264                      (send[i].empty() ? NULL : &send[i][0]),
00265                      comm.get(),
00266                      &packedsize);
00267       sendsize += packedsize;
00268     }
00269 
00270   libmesh_assert (sendsize /* should at least be 1! */);
00271   sendbuf.resize (sendsize);
00272 
00273   // Pack the send buffer
00274   int pos=0;
00275 
00276   // ... the size of the outer buffer
00277   sendsize = libMesh::cast_int<int>(send.size());
00278   MPI_Pack (&sendsize, 1, libMesh::Parallel::StandardType<unsigned int>(),
00279             &sendbuf[0], libMesh::cast_int<int>(sendbuf.size()), &pos,
00280             comm.get());
00281 
00282   for (std::size_t i=0; i<send.size(); i++)
00283     {
00284       // ... the size of the ith inner buffer
00285       sendsize = libMesh::cast_int<int>(send[i].size());
00286       MPI_Pack (&sendsize, 1, libMesh::Parallel::StandardType<unsigned int>(),
00287                 &sendbuf[0], libMesh::cast_int<int>(sendbuf.size()), &pos,
00288                 comm.get());
00289 
00290       // ... the contents of the ith inner buffer
00291       if (!send[i].empty())
00292         MPI_Pack (&send[i][0], libMesh::cast_int<int>(send[i].size()),
00293                   libMesh::Parallel::StandardType<T1>(&send[i][0]),
00294                   &sendbuf[0], libMesh::cast_int<int>(sendbuf.size()), &pos,
00295                   comm.get());
00296     }
00297 
00298   libmesh_assert_equal_to (static_cast<unsigned int>(pos), sendbuf.size());
00299 
00300   libMesh::Parallel::Request request;
00301 
00302   comm.send (dest_processor_id, sendbuf, MPI_PACKED, request, send_tag);
00303 
00304   comm.receive (source_processor_id, recvbuf, MPI_PACKED, recv_tag);
00305 
00306   // Unpack the received buffer
00307   libmesh_assert (!recvbuf.empty());
00308   pos=0;
00309   MPI_Unpack (&recvbuf[0], libMesh::cast_int<int>(recvbuf.size()), &pos,
00310               &sendsize, 1, libMesh::Parallel::StandardType<unsigned int>(),
00311               comm.get());
00312 
00313   // ... size the outer buffer
00314   recv.resize (sendsize);
00315 
00316   for (std::size_t i=0; i<recv.size(); i++)
00317     {
00318       MPI_Unpack (&recvbuf[0], libMesh::cast_int<int>(recvbuf.size()), &pos,
00319                   &sendsize, 1, libMesh::Parallel::StandardType<unsigned int>(),
00320                   comm.get());
00321 
00322       // ... size the inner buffer
00323       recv[i].resize (sendsize);
00324 
00325       // ... unpack the inner buffer if it is not empty
00326       if (!recv[i].empty())
00327         MPI_Unpack (&recvbuf[0], libMesh::cast_int<int>(recvbuf.size()), &pos,
00328                     &recv[i][0], libMesh::cast_int<int>(recv[i].size()),
00329                     libMesh::Parallel::StandardType<T2>(&recv[i][0]),
00330                     comm.get());
00331     }
00332 
00333   request.wait();
00334 
00335   STOP_LOG("send_receive()", "Parallel");
00336 }
00337 
00338 #endif // LIBMESH_HAVE_MPI
00339 
00340 } // Anonymous namespace
00341 
00342 
00343 
00344 namespace libMesh
00345 {
00346 
00347 namespace Parallel
00348 {
00349 
00350 /*
00351  * A reference to the default libMesh communicator.  This is now
00352  * deprecated - instead of libMesh::Parallel::Communicator_World use
00353  * libMesh::CommWorld
00354  */
00355 #ifdef LIBMESH_DISABLE_COMMWORLD
00356 extern FakeCommunicator& Communicator_World;
00357 #else
00358 extern Communicator& Communicator_World;
00359 #endif
00360 
00361 
00365 template <typename Context, typename Iter>
00366 inline std::size_t packed_range_size (const Context *context,
00367                                       Iter range_begin,
00368                                       const Iter range_end)
00369 {
00370   std::size_t buffer_size = 0;
00371   for (Iter range_count = range_begin;
00372        range_count != range_end;
00373        ++range_count)
00374     {
00375       buffer_size += Parallel::packable_size(*range_count, context);
00376     }
00377   return buffer_size;
00378 }
00379 
00380 
00384 template <typename Context, typename buffertype, typename Iter>
00385 inline Iter pack_range (const Context *context,
00386                         Iter range_begin,
00387                         const Iter range_end,
00388                         std::vector<buffertype>& buffer)
00389 {
00390   // When we serialize into buffers, we need to use large buffers to optimize MPI
00391   // bandwidth, but not so large as to risk allocation failures.  max_buffer_size
00392   // is measured in number of buffer type entries; number of bytes may be 4 or 8
00393   // times larger depending on configuration.
00394 
00395   static const std::size_t max_buffer_size = 1000000;
00396   // static const std::size_t max_buffer_size = std::size_t(-1);
00397 
00398   // Count the total size of and preallocate buffer for efficiency.
00399   // Prepare to stop early if the buffer would be too large.
00400   std::size_t buffer_size = 0;
00401   Iter range_stop = range_begin;
00402   for (; range_stop != range_end; ++range_stop)
00403     {
00404       std::size_t next_buffer_size =
00405         Parallel::packable_size(*range_stop, context);
00406       if (buffer_size + next_buffer_size >= max_buffer_size)
00407         break;
00408       else
00409         buffer_size += next_buffer_size;
00410     }
00411   buffer.reserve(buffer.size() + buffer_size);
00412 
00413   // Pack the objects into the buffer
00414   for (; range_begin != range_stop; ++range_begin)
00415     {
00416 #ifndef NDEBUG
00417       std::size_t old_size = buffer.size();
00418 #endif
00419 
00420       Parallel::pack(*range_begin, buffer, context);
00421 
00422 #ifndef NDEBUG
00423       unsigned int my_packable_size =
00424         Parallel::packable_size(*range_begin, context);
00425       unsigned int my_packed_size =
00426         Parallel::packed_size (*range_begin, buffer.begin() +
00427                                old_size);
00428       libmesh_assert_equal_to (my_packable_size, my_packed_size);
00429       libmesh_assert_equal_to (buffer.size(), old_size + my_packable_size);
00430 #endif
00431     }
00432 
00433   return range_stop;
00434 }
00435 
00436 
00437 
00441 template <typename Context, typename buffertype, typename OutputIter>
00442 inline void unpack_range (const std::vector<buffertype>& buffer,
00443                           Context *context,
00444                           OutputIter out)
00445 {
00446   // Our objects should be of the correct type to be assigned to the
00447   // output iterator
00448   typedef typename std::iterator_traits<OutputIter>::value_type T;
00449 
00450   // Loop through the buffer and unpack each object, returning the
00451   // object pointer via the output iterator
00452   typename std::vector<buffertype>::const_iterator
00453     next_object_start = buffer.begin();
00454 
00455   while (next_object_start < buffer.end())
00456     {
00457       T* obj;
00458       Parallel::unpack(next_object_start, &obj, context);
00459       libmesh_assert(obj);
00460       next_object_start += Parallel::packed_size(obj, next_object_start);
00461       *out++ = obj;
00462     }
00463 
00464   // We should have used up the exact amount of data in the buffer
00465   libmesh_assert (next_object_start == buffer.end());
00466 }
00467 
00468 
00469 inline Communicator::Communicator () :
00470 #ifdef LIBMESH_HAVE_MPI
00471   _communicator(MPI_COMM_SELF),
00472 #endif
00473   _rank(0),
00474   _size(1),
00475   _send_mode(DEFAULT),
00476   used_tag_values(),
00477   _I_duped_it(false) {}
00478 
00479 inline Communicator::Communicator (const communicator &comm) :
00480 #ifdef LIBMESH_HAVE_MPI
00481   _communicator(MPI_COMM_SELF),
00482 #endif
00483   _rank(0),
00484   _size(1),
00485   _send_mode(DEFAULT),
00486   used_tag_values(),
00487   _I_duped_it(false)
00488 {
00489   this->assign(comm);
00490 }
00491 
00492 inline Communicator::~Communicator () {
00493   this->clear();
00494 }
00495 
00496 #ifdef LIBMESH_HAVE_MPI
00497 inline void Communicator::split(int color, int key, Communicator &target) const {
00498   target.clear();
00499   MPI_Comm newcomm;
00500   MPI_Comm_split(this->get(), color, key, &newcomm);
00501   target.assign(newcomm);
00502   target.send_mode(this->send_mode());
00503 }
00504 #else
00505 inline void Communicator::split(int, int, Communicator &target) const {
00506   target.assign(this->get());
00507 }
00508 #endif
00509 
00510 inline void Communicator::duplicate(const Communicator &comm) {
00511   this->duplicate(comm._communicator);
00512   this->send_mode(comm.send_mode());
00513 }
00514 
00515 #ifdef LIBMESH_HAVE_MPI
00516 inline void Communicator::duplicate(const communicator &comm) {
00517   if (_communicator != MPI_COMM_NULL)
00518     {
00519       MPI_Comm_dup(comm, &_communicator);
00520       _I_duped_it = true;
00521     }
00522   this->assign(_communicator);
00523 }
00524 #else
00525 inline void Communicator::duplicate(const communicator &) { }
00526 #endif
00527 
00528 inline void Communicator::clear() {
00529 #ifdef LIBMESH_HAVE_MPI
00530   if (_I_duped_it)
00531     {
00532       libmesh_assert (_communicator != MPI_COMM_NULL);
00533       MPI_Comm_free(&_communicator);
00534       _communicator = MPI_COMM_NULL;
00535     }
00536   _I_duped_it = false;
00537 #endif
00538 }
00539 
00540 inline Communicator& Communicator::operator= (const communicator &comm) {
00541   this->clear();
00542   this->assign(comm);
00543   return *this;
00544 }
00545 
00546 // Disallowed copy constructor
00547 inline Communicator::Communicator (const Communicator &) :
00548 #ifdef LIBMESH_HAVE_MPI
00549   _communicator(MPI_COMM_NULL),
00550 #endif
00551   _rank(0),
00552   _size(1),
00553   _send_mode(DEFAULT),
00554   used_tag_values(),
00555   _I_duped_it(false)
00556 {
00557   libmesh_not_implemented();
00558 }
00559 
00560 inline void Communicator::assign(const communicator &comm)
00561 {
00562   _communicator = comm;
00563 #ifdef LIBMESH_HAVE_MPI
00564   if (_communicator != MPI_COMM_NULL)
00565     {
00566       int i;
00567       MPI_Comm_size(_communicator, &i);
00568       libmesh_assert_greater_equal (i, 0);
00569       _size = static_cast<unsigned int>(i);
00570 
00571       MPI_Comm_rank(_communicator, &i);
00572       libmesh_assert_greater_equal (i, 0);
00573       _rank = static_cast<unsigned int>(i);
00574     }
00575   else
00576     {
00577       _rank = 0;
00578       _size = 1;
00579     }
00580 #endif
00581   _send_mode = DEFAULT;
00582 }
00583 
00584 
00585 
00586 inline Status::Status () :
00587   _status(),
00588   _datatype()
00589 {}
00590 
00591 inline Status::Status (const data_type &type) :
00592   _status(),
00593   _datatype(type)
00594 {}
00595 
00596 inline Status::Status (const status &stat) :
00597   _status(stat),
00598   _datatype()
00599 {}
00600 
00601 inline Status::Status (const status &stat,
00602                        const data_type &type) :
00603   _status(stat),
00604   _datatype(type)
00605 {}
00606 
00607 inline Status::Status (const Status &stat) :
00608   _status(stat._status),
00609   _datatype(stat._datatype)
00610 {}
00611 
00612 inline Status::Status (const Status    &stat,
00613                        const data_type &type) :
00614   _status(stat._status),
00615   _datatype(type)
00616 {}
00617 
00618 inline int Status::source () const
00619 {
00620 #ifdef LIBMESH_HAVE_MPI
00621   return _status.MPI_SOURCE;
00622 #else
00623   return 0;
00624 #endif
00625 }
00626 
00627 inline int Status::tag () const
00628 {
00629 #ifdef LIBMESH_HAVE_MPI
00630   return _status.MPI_TAG;
00631 #else
00632   libmesh_not_implemented();
00633   return 0;
00634 #endif
00635 }
00636 
00637 #ifdef LIBMESH_HAVE_MPI
00638 inline unsigned int Status::size (const data_type &type) const
00639 {
00640   int msg_size;
00641   MPI_Get_count (const_cast<MPI_Status*>(&_status), type, &msg_size);
00642   libmesh_assert_greater_equal (msg_size, 0);
00643   return msg_size;
00644 }
00645 #else
00646 inline unsigned int Status::size (const data_type &) const
00647 {
00648   libmesh_not_implemented();
00649   return 0;
00650 }
00651 #endif
00652 
00653 inline unsigned int Status::size () const
00654 { return this->size (this->datatype()); }
00655 
00656 
00657 
00658 inline Request::Request () :
00659 #ifdef LIBMESH_HAVE_MPI
00660   _request(MPI_REQUEST_NULL),
00661 #else
00662   _request(),
00663 #endif
00664   post_wait_work(NULL)
00665 {}
00666 
00667 inline Request::Request (const request &r) :
00668   _request(r),
00669   post_wait_work(NULL)
00670 {}
00671 
00672 inline Request::Request (const Request &other) :
00673   _request(other._request),
00674   post_wait_work(other.post_wait_work)
00675 {
00676   if (other._prior_request.get())
00677     _prior_request = UniquePtr<Request>
00678       (new Request(*other._prior_request.get()));
00679 
00680   // operator= should behave like a shared pointer
00681   if (post_wait_work)
00682     post_wait_work->second++;
00683 }
00684 
00685 inline void Request::cleanup()
00686 {
00687   if (post_wait_work)
00688     {
00689       // Decrement the use count
00690       post_wait_work->second--;
00691 
00692       if (!post_wait_work->second)
00693         {
00694 #ifdef DEBUG
00695           // If we're done using this request, then we'd better have
00696           // done the work we waited for
00697           for (std::vector<PostWaitWork*>::iterator i =
00698                  post_wait_work->first.begin();
00699                i != post_wait_work->first.end(); ++i)
00700             libmesh_assert(!(*i));
00701 #endif
00702           delete post_wait_work;
00703           post_wait_work = NULL;
00704         }
00705     }
00706 }
00707 
00708 inline Request& Request::operator = (const Request &other)
00709 {
00710   this->cleanup();
00711   _request = other._request;
00712   post_wait_work = other.post_wait_work;
00713 
00714   if (other._prior_request.get())
00715     _prior_request = UniquePtr<Request>
00716       (new Request(*other._prior_request.get()));
00717 
00718   // operator= should behave like a shared pointer
00719   if (post_wait_work)
00720     post_wait_work->second++;
00721 
00722   return *this;
00723 }
00724 
00725 inline Request& Request::operator = (const request &r)
00726 {
00727   this->cleanup();
00728   _request = r;
00729   post_wait_work = NULL;
00730   return *this;
00731 }
00732 
00733 inline Request::~Request () {
00734   this->cleanup();
00735 }
00736 
00737 inline Status Request::wait ()
00738 {
00739   START_LOG("wait()", "Parallel::Request");
00740 
00741   if (_prior_request.get())
00742     _prior_request->wait();
00743 
00744   Status stat;
00745 #ifdef LIBMESH_HAVE_MPI
00746   MPI_Wait (&_request, stat.get());
00747 #endif
00748   if (post_wait_work)
00749     for (std::vector<PostWaitWork*>::iterator i =
00750            post_wait_work->first.begin();
00751          i != post_wait_work->first.end(); ++i)
00752       {
00753         // The user should never try to give us NULL work or try
00754         // to wait() twice.
00755         libmesh_assert (*i);
00756         (*i)->run();
00757         delete (*i);
00758         *i = NULL;
00759       }
00760 
00761   STOP_LOG("wait()", "Parallel::Request");
00762   return stat;
00763 }
00764 
00765 inline bool Request::test ()
00766 {
00767 #ifdef LIBMESH_HAVE_MPI
00768   int val=0;
00769 
00770   MPI_Test (&_request,
00771             &val,
00772             MPI_STATUS_IGNORE);
00773   if (val)
00774     {
00775       libmesh_assert          (_request == MPI_REQUEST_NULL);
00776       libmesh_assert_equal_to (val, 1);
00777     }
00778 
00779   return val;
00780 #else
00781   return true;
00782 #endif
00783 }
00784 
00785 #ifdef LIBMESH_HAVE_MPI
00786 inline bool Request::test (status &stat)
00787 {
00788   int val=0;
00789 
00790   MPI_Test (&_request,
00791             &val,
00792             &stat);
00793 
00794   return val;
00795 }
00796 #else
00797 inline bool Request::test (status &)
00798 {
00799   return true;
00800 }
00801 #endif
00802 
00803 inline void Request::add_prior_request(const Request& req)
00804 {
00805   // We're making a chain of prior requests, not a tree
00806   libmesh_assert(!req._prior_request.get());
00807 
00808   Request *new_prior_req = new Request(req);
00809 
00810   // new_prior_req takes ownership of our existing _prior_request
00811   new_prior_req->_prior_request.reset(this->_prior_request.release());
00812 
00813   // Our _prior_request now manages the new resource we just set up
00814   this->_prior_request.reset(new_prior_req);
00815 }
00816 
00817 inline void Request::add_post_wait_work(PostWaitWork* work)
00818 {
00819   if (!post_wait_work)
00820     post_wait_work = new
00821       std::pair<std::vector <PostWaitWork* >, unsigned int>
00822       (std::vector <PostWaitWork* >(), 1);
00823   post_wait_work->first.push_back(work);
00824 }
00825 
00826 
00827 
00831 #ifdef LIBMESH_HAVE_MPI
00832 inline void Communicator::barrier () const
00833 {
00834   if (this->size() > 1)
00835     {
00836       START_LOG("barrier()", "Parallel");
00837 
00838       MPI_Barrier (this->get());
00839 
00840       STOP_LOG("barrier()", "Parallel");
00841     }
00842 }
00843 #else
00844 inline void Communicator::barrier () const {}
00845 #endif
00846 
00847 
00848 // legacy e.g. Paralell::send() methods, requires
00849 // Communicator_World
00850 #ifndef LIBMESH_DISABLE_COMMWORLD
00851 inline void barrier (const Communicator &comm = Communicator_World)
00852 {
00853   comm.barrier();
00854 }
00855 
00856 template <typename T>
00857 inline bool verify(const T &r,
00858                    const Communicator &comm = Communicator_World)
00859 { return comm.verify(r); }
00860 
00861 template <typename T>
00862 inline void min(T &r,
00863                 const Communicator &comm = Communicator_World)
00864 { comm.min(r); }
00865 
00866 template <typename T, typename U>
00867 inline void minloc(T &r,
00868                    U &min_id,
00869                    const Communicator &comm = Communicator_World)
00870 { comm.minloc(r, min_id); }
00871 
00872 template <typename T>
00873 inline void max(T &r,
00874                 const Communicator &comm = Communicator_World)
00875 { comm.max(r); }
00876 
00877 template <typename T, typename U>
00878 inline void maxloc(T &r,
00879                    U &max_id,
00880                    const Communicator &comm = Communicator_World)
00881 { comm.maxloc(r, max_id); }
00882 
00883 template <typename T>
00884 inline void sum(T &r,
00885                 const Communicator &comm = Communicator_World)
00886 { comm.sum(r); }
00887 
00888 template <typename T>
00889 inline void set_union(T &data, const unsigned int root_id,
00890                       const Communicator &comm = Communicator_World)
00891 { comm.set_union(data, root_id); }
00892 
00893 template <typename T>
00894 inline void set_union(T &data,
00895                       const Communicator &comm = Communicator_World)
00896 { comm.set_union(data); }
00897 
00898 inline status probe (const unsigned int src_processor_id,
00899                      const MessageTag &tag=any_tag,
00900                      const Communicator &comm = Communicator_World)
00901 { return comm.probe(src_processor_id, tag); }
00902 
00903 template <typename T>
00904 inline void send (const unsigned int dest_processor_id,
00905                   T &data,
00906                   const MessageTag &tag=no_tag,
00907                   const Communicator &comm = Communicator_World)
00908 { comm.send(dest_processor_id, data, tag); }
00909 
00910 template <typename T>
00911 inline void send (const unsigned int dest_processor_id,
00912                   T &data,
00913                   Request &req,
00914                   const MessageTag &tag=no_tag,
00915                   const Communicator &comm = Communicator_World)
00916 { comm.send(dest_processor_id, data, req, tag); }
00917 
00918 template <typename T>
00919 inline void send (const unsigned int dest_processor_id,
00920                   T &data,
00921                   const DataType &type,
00922                   const MessageTag &tag=no_tag,
00923                   const Communicator &comm = Communicator_World)
00924 { comm.send(dest_processor_id, data, type, tag); }
00925 
00926 template <typename T>
00927 inline void send (const unsigned int dest_processor_id,
00928                   T &data,
00929                   const DataType &type,
00930                   Request &req,
00931                   const MessageTag &tag=no_tag,
00932                   const Communicator &comm = Communicator_World)
00933 { comm.send(dest_processor_id, data, type, req, tag); }
00934 
00935 
00936 template <typename Context, typename Iter>
00937 inline void send_packed_range (const unsigned int dest_processor_id,
00938                                const Context *context,
00939                                Iter range_begin,
00940                                const Iter range_end,
00941                                const MessageTag &tag=no_tag,
00942                                const Communicator &comm = Communicator_World)
00943 { comm.send_packed_range(dest_processor_id, context, range_begin, range_end, tag); }
00944 
00945 
00946 template <typename Context, typename Iter>
00947 inline void send_packed_range (const unsigned int dest_processor_id,
00948                                const Context *context,
00949                                Iter range_begin,
00950                                const Iter range_end,
00951                                Request &req,
00952                                const MessageTag &tag=no_tag,
00953                                const Communicator &comm = Communicator_World)
00954 { comm.send_packed_range(dest_processor_id, context, range_begin, range_end, req, tag); }
00955 
00956 
00957 template <typename T>
00958 inline void nonblocking_send (const unsigned int dest_processor_id,
00959                               T &buf,
00960                               const DataType &type,
00961                               Request &r,
00962                               const MessageTag &tag=no_tag,
00963                               const Communicator &comm = Communicator_World)
00964 { comm.send (dest_processor_id, buf, type, r, tag); }
00965 
00966 template <typename T>
00967 inline void nonblocking_send (const unsigned int dest_processor_id,
00968                               T &buf,
00969                               Request &r,
00970                               const MessageTag &tag=no_tag,
00971                               const Communicator &comm = Communicator_World)
00972 { comm.send (dest_processor_id, buf, r, tag); }
00973 
00974 template <typename T>
00975 inline Status receive (const unsigned int src_processor_id,
00976                        T &buf,
00977                        const MessageTag &tag=any_tag,
00978                        const Communicator &comm = Communicator_World)
00979 { return comm.receive (src_processor_id, buf, tag); }
00980 
00981 template <typename T>
00982 inline void receive (const unsigned int src_processor_id,
00983                      T &buf,
00984                      Request &req,
00985                      const MessageTag &tag=any_tag,
00986                      const Communicator &comm = Communicator_World)
00987 { comm.receive (src_processor_id, buf, req, tag); }
00988 
00989 template <typename T>
00990 inline Status receive (const unsigned int src_processor_id,
00991                        T &buf,
00992                        const DataType &type,
00993                        const MessageTag &tag=any_tag,
00994                        const Communicator &comm = Communicator_World)
00995 { return comm.receive (src_processor_id, buf, type, tag); }
00996 
00997 template <typename T>
00998 inline void receive (const unsigned int src_processor_id,
00999                      T &buf,
01000                      const DataType &type,
01001                      Request &req,
01002                      const MessageTag &tag=any_tag,
01003                      const Communicator &comm = Communicator_World)
01004 { comm.receive (src_processor_id, buf, type, req, tag); }
01005 
01006 template <typename Context, typename OutputIter>
01007 inline void receive_packed_range (const unsigned int src_processor_id,
01008                                   Context *context,
01009                                   OutputIter out,
01010                                   const MessageTag &tag=any_tag,
01011                                   const Communicator &comm = Communicator_World)
01012 { comm.receive_packed_range (src_processor_id, context, out, tag); }
01013 
01014 // template <typename Context, typename OutputIter>
01015 // inline void receive_packed_range (const unsigned int src_processor_id,
01016 //                                   Context *context,
01017 //                                   OutputIter out,
01018 //                                   Request &req,
01019 //                                   const MessageTag &tag=any_tag,
01020 //                                   const Communicator &comm = Communicator_World)
01021 // { comm.receive_packed_range (src_processor_id, context, out, req, tag); }
01022 
01023 template <typename T>
01024 inline void nonblocking_receive (const unsigned int src_processor_id,
01025                                  T &buf,
01026                                  const DataType &type,
01027                                  Request &r,
01028                                  const MessageTag &tag=any_tag,
01029                                  const Communicator &comm = Communicator_World)
01030 { comm.receive (src_processor_id, buf, type, r, tag); }
01031 
01032 template <typename T>
01033 inline void nonblocking_receive (const unsigned int src_processor_id,
01034                                  T &buf,
01035                                  Request &r,
01036                                  const MessageTag &tag=any_tag,
01037                                  const Communicator &comm = Communicator_World)
01038 { comm.receive (src_processor_id, buf, r, tag); }
01039 
01040 template <typename T1, typename T2>
01041 inline void send_receive(const unsigned int dest_processor_id,
01042                          T1 &send,
01043                          const unsigned int source_processor_id,
01044                          T2 &recv,
01045                          const MessageTag &send_tag = no_tag,
01046                          const MessageTag &recv_tag = any_tag,
01047                          const Communicator &comm = Communicator_World)
01048 { comm.send_receive(dest_processor_id, send, source_processor_id, recv,
01049                     send_tag, recv_tag); }
01050 
01051 template <typename Context1, typename RangeIter, typename Context2, typename OutputIter>
01052 inline void send_receive_packed_range(const unsigned int dest_processor_id,
01053                                       const Context1* context1,
01054                                       RangeIter send_begin,
01055                                       const RangeIter send_end,
01056                                       const unsigned int source_processor_id,
01057                                       Context2* context2,
01058                                       OutputIter out,
01059                                       const MessageTag &send_tag = no_tag,
01060                                       const MessageTag &recv_tag = any_tag,
01061                                       const Communicator &comm = Communicator_World)
01062 { comm.send_receive_packed_range(dest_processor_id, context1, send_begin, send_end,
01063                                  source_processor_id, context2, out, send_tag, recv_tag); }
01064 
01065 template <typename T1, typename T2>
01066 inline void send_receive(const unsigned int dest_processor_id,
01067                          T1 &send,
01068                          const DataType &type1,
01069                          const unsigned int source_processor_id,
01070                          T2 &recv,
01071                          const DataType &type2,
01072                          const MessageTag &send_tag = no_tag,
01073                          const MessageTag &recv_tag = any_tag,
01074                          const Communicator &comm = Communicator_World)
01075 { comm.send_receive(dest_processor_id, send, type1, source_processor_id,
01076                     recv, type2, send_tag, recv_tag); }
01077 
01078 template <typename T>
01079 inline void gather(const unsigned int root_id,
01080                    T send,
01081                    std::vector<T> &recv,
01082                    const Communicator &comm = Communicator_World)
01083 { comm.gather(root_id, send, recv); }
01084 
01085 template <typename T>
01086 inline void gather(const unsigned int root_id,
01087                    std::vector<T> &r,
01088                    const Communicator &comm = Communicator_World)
01089 { comm.gather(root_id, r); }
01090 
01091 template <typename T>
01092 inline void allgather(T send,
01093                       std::vector<T> &recv,
01094                       const Communicator &comm = Communicator_World)
01095 { comm.allgather(send, recv); }
01096 
01097 template <typename T>
01098 inline void allgather(std::vector<T> &r,
01099                       const bool identical_buffer_sizes = false,
01100                       const Communicator &comm = Communicator_World)
01101 { comm.allgather(r, identical_buffer_sizes); }
01102 
01103 template <typename Context, typename Iter, typename OutputIter>
01104 inline void gather_packed_range (const unsigned int root_id,
01105                                  Context *context,
01106                                  Iter range_begin,
01107                                  const Iter range_end,
01108                                  OutputIter out,
01109                                  const Communicator &comm = Communicator_World)
01110 { comm.gather_packed_range(root_id, context, range_begin, range_end, out); }
01111 
01112 template <typename Context, typename Iter, typename OutputIter>
01113 inline void allgather_packed_range (Context *context,
01114                                     Iter range_begin,
01115                                     const Iter range_end,
01116                                     OutputIter out,
01117                                     const Communicator &comm = Communicator_World)
01118 { comm.allgather_packed_range(context, range_begin, range_end, out); }
01119 
01120 template <typename T>
01121 inline void alltoall(std::vector<T> &r,
01122                      const Communicator &comm = Communicator_World)
01123 { comm.alltoall(r); }
01124 
01125 template <typename T>
01126 inline void broadcast(T &data, const unsigned int root_id=0,
01127                       const Communicator &comm = Communicator_World)
01128 { comm.broadcast(data, root_id); }
01129 
01130 template <typename Context, typename OutputContext, typename Iter, typename OutputIter>
01131 inline void broadcast_packed_range (const Context *context1,
01132                                     Iter range_begin,
01133                                     const Iter range_end,
01134                                     OutputContext *context2,
01135                                     OutputIter out,
01136                                     const unsigned int root_id = 0,
01137                                     const Communicator &comm = Communicator_World)
01138 { comm.broadcast_packed_range(context1, range_begin, range_end, context2, out, root_id); }
01139 
01140 #endif // #ifndef LIBMESH_DISABLE_COMMWORLD
01141 
01142 //-----------------------------------------------------------------------
01143 // Parallel members
01144 
01145 inline
01146 MessageTag::~MessageTag()
01147 {
01148   if (_comm)
01149     _comm->dereference_unique_tag(_tagvalue);
01150 }
01151 
01152 
01153 inline
01154 MessageTag::MessageTag(const MessageTag &other)
01155   : _tagvalue(other._tagvalue), _comm(other._comm)
01156 {
01157   if (_comm)
01158     _comm->reference_unique_tag(_tagvalue);
01159 }
01160 
01161 
01162 inline
01163 MessageTag Communicator::get_unique_tag(int tagvalue) const
01164 {
01165   if (used_tag_values.count(tagvalue))
01166     {
01167       // Get the largest value in the used values, and pick one
01168       // larger
01169       tagvalue = used_tag_values.rbegin()->first+1;
01170       libmesh_assert(!used_tag_values.count(tagvalue));
01171     }
01172   used_tag_values[tagvalue] = 1;
01173 
01174   // #ifndef NDEBUG
01175   //   // Make sure everyone called get_unique_tag and make sure
01176   //   // everyone got the same value
01177   //   int maxval = tagvalue;
01178   //   this->max(maxval);
01179   //   libmesh_assert_equal_to (tagvalue, maxval);
01180   // #endif
01181 
01182   return MessageTag(tagvalue, this);
01183 }
01184 
01185 
01186 inline
01187 void Communicator::reference_unique_tag(int tagvalue) const
01188 {
01189   // This has better be an already-acquired tag.
01190   libmesh_assert(used_tag_values.count(tagvalue));
01191 
01192   used_tag_values[tagvalue]++;
01193 }
01194 
01195 
01196 inline
01197 void Communicator::dereference_unique_tag(int tagvalue) const
01198 {
01199   // This has better be an already-acquired tag.
01200   libmesh_assert(used_tag_values.count(tagvalue));
01201 
01202   used_tag_values[tagvalue]--;
01203   // If we don't have any more outstanding references, we
01204   // don't even need to keep this tag in our "used" set.
01205   if (!used_tag_values[tagvalue])
01206     used_tag_values.erase(tagvalue);
01207 }
01208 
01209 
01210 #ifdef LIBMESH_HAVE_MPI
01211 template<>
01212 inline data_type dataplusint_type<short int>() { return MPI_SHORT_INT; }
01213 
01214 template<>
01215 inline data_type dataplusint_type<int>() { return MPI_2INT; }
01216 
01217 template<>
01218 inline data_type dataplusint_type<long>() { return MPI_LONG_INT; }
01219 
01220 template<>
01221 inline data_type dataplusint_type<float>() { return MPI_FLOAT_INT; }
01222 
01223 template<>
01224 inline data_type dataplusint_type<double>() { return MPI_DOUBLE_INT; }
01225 
01226 template<>
01227 inline data_type dataplusint_type<long double>() { return MPI_LONG_DOUBLE_INT; }
01228 
01229 template <typename T>
01230 inline bool Communicator::verify(const T &r) const
01231 {
01232   if (this->size() > 1 && Attributes<T>::has_min_max == true)
01233     {
01234       T tempmin = r, tempmax = r;
01235       this->min(tempmin);
01236       this->max(tempmax);
01237       bool verified = (r == tempmin) &&
01238         (r == tempmax);
01239       this->min(verified);
01240       return verified;
01241     }
01242   return true;
01243 }
01244 
01245 
01246 
01247 template <typename T>
01248 inline bool Communicator::semiverify(const T *r) const
01249 {
01250   if (this->size() > 1 && Attributes<T>::has_min_max == true)
01251     {
01252       T tempmin, tempmax;
01253       if (r)
01254         tempmin = tempmax = *r;
01255       else
01256         {
01257           Attributes<T>::set_highest(tempmin);
01258           Attributes<T>::set_lowest(tempmax);
01259         }
01260       this->min(tempmin);
01261       this->max(tempmax);
01262       bool invalid = r && ((*r != tempmin) &&
01263                            (*r != tempmax));
01264       this->max(invalid);
01265       return !invalid;
01266     }
01267   return true;
01268 }
01269 
01270 
01271 
01272 template <typename T>
01273 inline bool Communicator::semiverify(const std::vector<T> *r) const
01274 {
01275   if (this->size() > 1 && Attributes<T>::has_min_max == true)
01276     {
01277       std::size_t rsize = r ? r->size() : 0;
01278       std::size_t *psize = r ? &rsize : NULL;
01279 
01280       if (!this->semiverify(psize))
01281         return false;
01282 
01283       this->max(rsize);
01284 
01285       std::vector<T> tempmin, tempmax;
01286       if (r)
01287         {
01288           tempmin = tempmax = *r;
01289         }
01290       else
01291         {
01292           tempmin.resize(rsize);
01293           tempmax.resize(rsize);
01294           Attributes<std::vector<T> >::set_highest(tempmin);
01295           Attributes<std::vector<T> >::set_lowest(tempmax);
01296         }
01297       this->min(tempmin);
01298       this->max(tempmax);
01299       bool invalid = r && ((*r != tempmin) &&
01300                            (*r != tempmax));
01301       this->max(invalid);
01302       return !invalid;
01303     }
01304   return true;
01305 }
01306 
01307 
01308 
01309 inline bool Communicator::verify(const std::string & r) const
01310 {
01311   if (this->size() > 1)
01312     {
01313       // Cannot use <char> since MPI_MIN is not
01314       // strictly defined for chars!
01315       std::vector<short int> temp; temp.reserve(r.size());
01316       for (std::size_t i=0; i != r.size(); ++i)
01317         temp.push_back(r[i]);
01318       return this->verify(temp);
01319     }
01320   return true;
01321 }
01322 
01323 
01324 
01325 inline bool Communicator::semiverify(const std::string * r) const
01326 {
01327   if (this->size() > 1)
01328     {
01329       std::size_t rsize = r ? r->size() : 0;
01330       std::size_t *psize = r ? &rsize : NULL;
01331 
01332       if (!this->semiverify(psize))
01333         return false;
01334 
01335       this->max(rsize);
01336 
01337       // Cannot use <char> since MPI_MIN is not
01338       // strictly defined for chars!
01339       std::vector<short int> temp (rsize);
01340       if (r)
01341         {
01342           temp.reserve(rsize);
01343           for (std::size_t i=0; i != rsize; ++i)
01344             temp.push_back((*r)[i]);
01345         }
01346 
01347       std::vector<short int> *ptemp = r ? &temp: NULL;
01348 
01349       return this->semiverify(ptemp);
01350     }
01351   return true;
01352 }
01353 
01354 
01355 
01356 template <typename T>
01357 inline void Communicator::min(T &r) const
01358 {
01359   if (this->size() > 1)
01360     {
01361       START_LOG("min(scalar)", "Parallel");
01362 
01363       T temp = r;
01364       MPI_Allreduce (&temp,
01365                      &r,
01366                      1,
01367                      StandardType<T>(&temp),
01368                      MPI_MIN,
01369                      this->get());
01370 
01371       STOP_LOG("min(scalar)", "Parallel");
01372     }
01373 }
01374 
01375 
01376 inline void Communicator::min(bool &r) const
01377 {
01378   if (this->size() > 1)
01379     {
01380       START_LOG("min(bool)", "Parallel");
01381 
01382       unsigned int tempsend = r;
01383       unsigned int temp;
01384       MPI_Allreduce (&tempsend,
01385                      &temp,
01386                      1,
01387                      StandardType<unsigned int>(),
01388                      MPI_MIN,
01389                      this->get());
01390       r = temp;
01391 
01392       STOP_LOG("min(bool)", "Parallel");
01393     }
01394 }
01395 
01396 
01397 template <typename T>
01398 inline void Communicator::min(std::vector<T> &r) const
01399 {
01400   if (this->size() > 1 && !r.empty())
01401     {
01402       START_LOG("min(vector)", "Parallel");
01403 
01404       libmesh_assert(this->verify(r.size()));
01405 
01406       std::vector<T> temp(r);
01407       MPI_Allreduce (&temp[0],
01408                      &r[0],
01409                      cast_int<int>(r.size()),
01410                      StandardType<T>(&temp[0]),
01411                      MPI_MIN,
01412                      this->get());
01413 
01414       STOP_LOG("min(vector)", "Parallel");
01415     }
01416 }
01417 
01418 
01419 inline void Communicator::min(std::vector<bool> &r) const
01420 {
01421   if (this->size() > 1 && !r.empty())
01422     {
01423       START_LOG("min(vector<bool>)", "Parallel");
01424 
01425       libmesh_assert(this->verify(r.size()));
01426 
01427       std::vector<unsigned int> ruint;
01428       pack_vector_bool(r, ruint);
01429       std::vector<unsigned int> temp(ruint.size());
01430       MPI_Allreduce (&ruint[0],
01431                      &temp[0],
01432                      cast_int<int>(ruint.size()),
01433                      StandardType<unsigned int>(),
01434                      MPI_BAND,
01435                      this->get());
01436       unpack_vector_bool(temp, r);
01437 
01438       STOP_LOG("min(vector<bool>)", "Parallel");
01439     }
01440 }
01441 
01442 
01443 template <typename T>
01444 inline void Communicator::minloc(T &r,
01445                                  unsigned int &min_id) const
01446 {
01447   if (this->size() > 1)
01448     {
01449       START_LOG("minloc(scalar)", "Parallel");
01450 
01451       DataPlusInt<T> in;
01452       in.val = r;
01453       in.rank = this->rank();
01454       DataPlusInt<T> out;
01455       MPI_Allreduce (&in,
01456                      &out,
01457                      1,
01458                      dataplusint_type<T>(),
01459                      MPI_MINLOC,
01460                      this->get());
01461       r = out.val;
01462       min_id = out.rank;
01463 
01464       STOP_LOG("minloc(scalar)", "Parallel");
01465     }
01466   else
01467     min_id = this->rank();
01468 }
01469 
01470 
01471 inline void Communicator::minloc(bool &r,
01472                                  unsigned int &min_id) const
01473 {
01474   if (this->size() > 1)
01475     {
01476       START_LOG("minloc(bool)", "Parallel");
01477 
01478       DataPlusInt<int> in;
01479       in.val = r;
01480       in.rank = this->rank();
01481       DataPlusInt<int> out;
01482       MPI_Allreduce (&in,
01483                      &out,
01484                      1,
01485                      dataplusint_type<int>(),
01486                      MPI_MINLOC,
01487                      this->get());
01488       r = out.val;
01489       min_id = out.rank;
01490 
01491       STOP_LOG("minloc(bool)", "Parallel");
01492     }
01493   else
01494     min_id = this->rank();
01495 }
01496 
01497 
01498 template <typename T>
01499 inline void Communicator::minloc(std::vector<T> &r,
01500                                  std::vector<unsigned int> &min_id) const
01501 {
01502   if (this->size() > 1 && !r.empty())
01503     {
01504       START_LOG("minloc(vector)", "Parallel");
01505 
01506       libmesh_assert(this->verify(r.size()));
01507 
01508       std::vector<DataPlusInt<T> > in(r.size());
01509       for (std::size_t i=0; i != r.size(); ++i)
01510         {
01511           in[i].val  = r[i];
01512           in[i].rank = this->rank();
01513         }
01514       std::vector<DataPlusInt<T> > out(r.size());
01515       MPI_Allreduce (&in[0],
01516                      &out[0],
01517                      cast_int<int>(r.size()),
01518                      dataplusint_type<T>(),
01519                      MPI_MINLOC,
01520                      this->get());
01521       for (std::size_t i=0; i != r.size(); ++i)
01522         {
01523           r[i]      = out[i].val;
01524           min_id[i] = out[i].rank;
01525         }
01526 
01527       STOP_LOG("minloc(vector)", "Parallel");
01528     }
01529   else if (!r.empty())
01530     {
01531       for (std::size_t i=0; i != r.size(); ++i)
01532         min_id[i] = this->rank();
01533     }
01534 }
01535 
01536 
01537 inline void Communicator::minloc(std::vector<bool> &r,
01538                                  std::vector<unsigned int> &min_id) const
01539 {
01540   if (this->size() > 1 && !r.empty())
01541     {
01542       START_LOG("minloc(vector<bool>)", "Parallel");
01543 
01544       libmesh_assert(this->verify(r.size()));
01545 
01546       std::vector<DataPlusInt<int> > in(r.size());
01547       for (std::size_t i=0; i != r.size(); ++i)
01548         {
01549           in[i].val  = r[i];
01550           in[i].rank = this->rank();
01551         }
01552       std::vector<DataPlusInt<int> > out(r.size());
01553       MPI_Allreduce (&in[0],
01554                      &out[0],
01555                      cast_int<int>(r.size()),
01556                      StandardType<int>(),
01557                      MPI_MINLOC,
01558                      this->get());
01559       for (std::size_t i=0; i != r.size(); ++i)
01560         {
01561           r[i]      = out[i].val;
01562           min_id[i] = out[i].rank;
01563         }
01564 
01565       STOP_LOG("minloc(vector<bool>)", "Parallel");
01566     }
01567   else if (!r.empty())
01568     {
01569       for (std::size_t i=0; i != r.size(); ++i)
01570         min_id[i] = this->rank();
01571     }
01572 }
01573 
01574 
01575 template <typename T>
01576 inline void Communicator::max(T &r) const
01577 {
01578   if (this->size() > 1)
01579     {
01580       START_LOG("max(scalar)", "Parallel");
01581 
01582       T temp;
01583       MPI_Allreduce (&r,
01584                      &temp,
01585                      1,
01586                      StandardType<T>(&r),
01587                      MPI_MAX,
01588                      this->get());
01589       r = temp;
01590 
01591       STOP_LOG("max(scalar)", "Parallel");
01592     }
01593 }
01594 
01595 
01596 inline void Communicator::max(bool &r) const
01597 {
01598   if (this->size() > 1)
01599     {
01600       START_LOG("max(bool)", "Parallel");
01601 
01602       unsigned int tempsend = r;
01603       unsigned int temp;
01604       MPI_Allreduce (&tempsend,
01605                      &temp,
01606                      1,
01607                      StandardType<unsigned int>(),
01608                      MPI_MAX,
01609                      this->get());
01610       r = temp;
01611 
01612       STOP_LOG("max(bool)", "Parallel");
01613     }
01614 }
01615 
01616 
01617 template <typename T>
01618 inline void Communicator::max(std::vector<T> &r) const
01619 {
01620   if (this->size() > 1 && !r.empty())
01621     {
01622       START_LOG("max(vector)", "Parallel");
01623 
01624       libmesh_assert(this->verify(r.size()));
01625 
01626       std::vector<T> temp(r);
01627       MPI_Allreduce (&temp[0],
01628                      &r[0],
01629                      cast_int<int>(r.size()),
01630                      StandardType<T>(&temp[0]),
01631                      MPI_MAX,
01632                      this->get());
01633 
01634       STOP_LOG("max(vector)", "Parallel");
01635     }
01636 }
01637 
01638 
01639 inline void Communicator::max(std::vector<bool> &r) const
01640 {
01641   if (this->size() > 1 && !r.empty())
01642     {
01643       START_LOG("max(vector<bool>)", "Parallel");
01644 
01645       libmesh_assert(this->verify(r.size()));
01646 
01647       std::vector<unsigned int> ruint;
01648       pack_vector_bool(r, ruint);
01649       std::vector<unsigned int> temp(ruint.size());
01650       MPI_Allreduce (&ruint[0],
01651                      &temp[0],
01652                      cast_int<int>(ruint.size()),
01653                      StandardType<unsigned int>(),
01654                      MPI_BOR,
01655                      this->get());
01656       unpack_vector_bool(temp, r);
01657 
01658       STOP_LOG("max(vector<bool>)", "Parallel");
01659     }
01660 }
01661 
01662 
01663 template <typename T>
01664 inline void Communicator::maxloc(T &r,
01665                                  unsigned int &max_id) const
01666 {
01667   if (this->size() > 1)
01668     {
01669       START_LOG("maxloc(scalar)", "Parallel");
01670 
01671       DataPlusInt<T> in;
01672       in.val = r;
01673       in.rank = this->rank();
01674       DataPlusInt<T> out;
01675       MPI_Allreduce (&in,
01676                      &out,
01677                      1,
01678                      dataplusint_type<T>(),
01679                      MPI_MAXLOC,
01680                      this->get());
01681       r = out.val;
01682       max_id = out.rank;
01683 
01684       STOP_LOG("maxloc(scalar)", "Parallel");
01685     }
01686   else
01687     max_id = this->rank();
01688 }
01689 
01690 
01691 inline void Communicator::maxloc(bool &r,
01692                                  unsigned int &max_id) const
01693 {
01694   if (this->size() > 1)
01695     {
01696       START_LOG("maxloc(bool)", "Parallel");
01697 
01698       DataPlusInt<int> in;
01699       in.val = r;
01700       in.rank = this->rank();
01701       DataPlusInt<int> out;
01702       MPI_Allreduce (&in,
01703                      &out,
01704                      1,
01705                      dataplusint_type<int>(),
01706                      MPI_MAXLOC,
01707                      this->get());
01708       r = out.val;
01709       max_id = out.rank;
01710 
01711       STOP_LOG("maxloc(bool)", "Parallel");
01712     }
01713   else
01714     max_id = this->rank();
01715 }
01716 
01717 
01718 template <typename T>
01719 inline void Communicator::maxloc(std::vector<T> &r,
01720                                  std::vector<unsigned int> &max_id) const
01721 {
01722   if (this->size() > 1 && !r.empty())
01723     {
01724       START_LOG("maxloc(vector)", "Parallel");
01725 
01726       libmesh_assert(this->verify(r.size()));
01727 
01728       std::vector<DataPlusInt<T> > in(r.size());
01729       for (std::size_t i=0; i != r.size(); ++i)
01730         {
01731           in[i].val  = r[i];
01732           in[i].rank = this->rank();
01733         }
01734       std::vector<DataPlusInt<T> > out(r.size());
01735       MPI_Allreduce (&in[0],
01736                      &out[0],
01737                      cast_int<int>(r.size()),
01738                      dataplusint_type<T>(),
01739                      MPI_MAXLOC,
01740                      this->get());
01741       for (std::size_t i=0; i != r.size(); ++i)
01742         {
01743           r[i]      = out[i].val;
01744           max_id[i] = out[i].rank;
01745         }
01746 
01747       STOP_LOG("maxloc(vector)", "Parallel");
01748     }
01749   else if (!r.empty())
01750     {
01751       for (std::size_t i=0; i != r.size(); ++i)
01752         max_id[i] = this->rank();
01753     }
01754 }
01755 
01756 
01757 inline void Communicator::maxloc(std::vector<bool> &r,
01758                                  std::vector<unsigned int> &max_id) const
01759 {
01760   if (this->size() > 1 && !r.empty())
01761     {
01762       START_LOG("maxloc(vector<bool>)", "Parallel");
01763 
01764       libmesh_assert(this->verify(r.size()));
01765 
01766       std::vector<DataPlusInt<int> > in(r.size());
01767       for (std::size_t i=0; i != r.size(); ++i)
01768         {
01769           in[i].val  = r[i];
01770           in[i].rank = this->rank();
01771         }
01772       std::vector<DataPlusInt<int> > out(r.size());
01773       MPI_Allreduce (&in[0],
01774                      &out[0],
01775                      cast_int<int>(r.size()),
01776                      StandardType<int>(),
01777                      MPI_MAXLOC,
01778                      this->get());
01779       for (std::size_t i=0; i != r.size(); ++i)
01780         {
01781           r[i]      = out[i].val;
01782           max_id[i] = out[i].rank;
01783         }
01784 
01785       STOP_LOG("maxloc(vector<bool>)", "Parallel");
01786     }
01787   else if (!r.empty())
01788     {
01789       for (std::size_t i=0; i != r.size(); ++i)
01790         max_id[i] = this->rank();
01791     }
01792 }
01793 
01794 
01795 template <typename T>
01796 inline void Communicator::sum(T &r) const
01797 {
01798   if (this->size() > 1)
01799     {
01800       START_LOG("sum()", "Parallel");
01801 
01802       T temp = r;
01803       MPI_Allreduce (&temp,
01804                      &r,
01805                      1,
01806                      StandardType<T>(&temp),
01807                      MPI_SUM,
01808                      this->get());
01809 
01810       STOP_LOG("sum()", "Parallel");
01811     }
01812 }
01813 
01814 
01815 template <typename T>
01816 inline void Communicator::sum(std::vector<T> &r) const
01817 {
01818   if (this->size() > 1 && !r.empty())
01819     {
01820       START_LOG("sum()", "Parallel");
01821 
01822       libmesh_assert(this->verify(r.size()));
01823 
01824       std::vector<T> temp(r);
01825       MPI_Allreduce (&temp[0],
01826                      &r[0],
01827                      cast_int<int>(r.size()),
01828                      StandardType<T>(&temp[0]),
01829                      MPI_SUM,
01830                      this->get());
01831 
01832       STOP_LOG("sum()", "Parallel");
01833     }
01834 }
01835 
01836 
01837 // We still do function overloading for complex sums - in a perfect
01838 // world we'd have a StandardSumOp to go along with StandardType...
01839 template <typename T>
01840 inline void Communicator::sum(std::complex<T> &r) const
01841 {
01842   if (this->size() > 1)
01843     {
01844       START_LOG("sum()", "Parallel");
01845 
01846       std::complex<T> temp(r);
01847       MPI_Allreduce (&temp,
01848                      &r,
01849                      2,
01850                      StandardType<T>(),
01851                      MPI_SUM,
01852                      this->get());
01853 
01854       STOP_LOG("sum()", "Parallel");
01855     }
01856 }
01857 
01858 
01859 template <typename T>
01860 inline void Communicator::sum(std::vector<std::complex<T> > &r) const
01861 {
01862   if (this->size() > 1 && !r.empty())
01863     {
01864       START_LOG("sum()", "Parallel");
01865 
01866       libmesh_assert(this->verify(r.size()));
01867 
01868       std::vector<std::complex<T> > temp(r);
01869       MPI_Allreduce (&temp[0],
01870                      &r[0],
01871                      cast_int<int>(r.size() * 2),
01872                      StandardType<T>(NULL),
01873                      MPI_SUM,
01874                      this->get());
01875 
01876       STOP_LOG("sum()", "Parallel");
01877     }
01878 }
01879 
01880 
01881 template <typename T>
01882 inline void Communicator::set_union(std::set<T> &data,
01883                                     const unsigned int root_id) const
01884 {
01885   std::vector<T> vecdata(data.begin(), data.end());
01886   this->gather(root_id, vecdata);
01887   if (this->rank() == root_id)
01888     data.insert(vecdata.begin(), vecdata.end());
01889 }
01890 
01891 
01892 
01893 template <typename T>
01894 inline void Communicator::set_union(std::set<T> &data) const
01895 {
01896   std::vector<T> vecdata(data.begin(), data.end());
01897   this->allgather(vecdata, false);
01898   data.insert(vecdata.begin(), vecdata.end());
01899 }
01900 
01901 
01902 
01903 template <typename T1, typename T2>
01904 inline void Communicator::set_union(std::map<T1,T2> &data,
01905                                     const unsigned int root_id) const
01906 {
01907   std::vector<std::pair<T1,T2> > vecdata(data.begin(), data.end());
01908   this->gather(root_id, vecdata);
01909   if (this->rank() == root_id)
01910     data.insert(vecdata.begin(), vecdata.end());
01911 }
01912 
01913 
01914 
01915 template <typename T1, typename T2>
01916 inline void Communicator::set_union(std::map<T1,T2> &data) const
01917 {
01918   std::vector<std::pair<T1,T2> > vecdata(data.begin(), data.end());
01919   this->allgather(vecdata, false);
01920   data.insert(vecdata.begin(), vecdata.end());
01921 }
01922 
01923 
01924 
01925 inline status Communicator::probe (const unsigned int src_processor_id,
01926                                    const MessageTag &tag) const
01927 {
01928   START_LOG("probe()", "Parallel");
01929 
01930   status stat;
01931 
01932   MPI_Probe (src_processor_id,
01933              tag.value(),
01934              this->get(),
01935              &stat);
01936 
01937   STOP_LOG("probe()", "Parallel");
01938 
01939   return stat;
01940 }
01941 
01942 
01943 
01944 template<typename T>
01945 inline void Communicator::send (const unsigned int dest_processor_id,
01946                                 std::basic_string<T> &buf,
01947                                 const MessageTag &tag) const
01948 {
01949   START_LOG("send()", "Parallel");
01950 
01951   T* dataptr = buf.empty() ? NULL : const_cast<T*>(buf.data());
01952 
01953 #ifndef NDEBUG
01954   // Only catch the return value when asserts are active.
01955   const int ierr =
01956 #endif
01957     ((this->send_mode() == SYNCHRONOUS) ?
01958      MPI_Ssend : MPI_Send) (dataptr,
01959                             cast_int<int>(buf.size()),
01960                             StandardType<T>(dataptr),
01961                             dest_processor_id,
01962                             tag.value(),
01963                             this->get());
01964 
01965   libmesh_assert (ierr == MPI_SUCCESS);
01966 
01967   STOP_LOG("send()", "Parallel");
01968 }
01969 
01970 
01971 
01972 template <typename T>
01973 inline void Communicator::send (const unsigned int dest_processor_id,
01974                                 std::basic_string<T> &buf,
01975                                 Request &req,
01976                                 const MessageTag &tag) const
01977 {
01978   START_LOG("send()", "Parallel");
01979 
01980   T* dataptr = buf.empty() ? NULL : const_cast<T*>(buf.data());
01981 
01982 #ifndef NDEBUG
01983   // Only catch the return value when asserts are active.
01984   const int ierr =
01985 #endif
01986     ((this->send_mode() == SYNCHRONOUS) ?
01987      MPI_Issend : MPI_Isend) (dataptr,
01988                               cast_int<int>(buf.size()),
01989                               StandardType<T>(dataptr),
01990                               dest_processor_id,
01991                               tag.value(),
01992                               this->get(),
01993                               req.get());
01994 
01995   libmesh_assert (ierr == MPI_SUCCESS);
01996 
01997   STOP_LOG("send()", "Parallel");
01998 }
01999 
02000 
02001 
02002 template <typename T>
02003 inline void Communicator::send (const unsigned int dest_processor_id,
02004                                 T &buf,
02005                                 const MessageTag &tag) const
02006 {
02007   START_LOG("send()", "Parallel");
02008 
02009   T* dataptr = &buf;
02010 
02011 #ifndef NDEBUG
02012   // Only catch the return value when asserts are active.
02013   const int ierr =
02014 #endif
02015     ((this->send_mode() == SYNCHRONOUS) ?
02016      MPI_Ssend : MPI_Send) (dataptr,
02017                             1,
02018                             StandardType<T>(dataptr),
02019                             dest_processor_id,
02020                             tag.value(),
02021                             this->get());
02022 
02023   libmesh_assert (ierr == MPI_SUCCESS);
02024 
02025   STOP_LOG("send()", "Parallel");
02026 }
02027 
02028 
02029 
02030 template <typename T>
02031 inline void Communicator::send (const unsigned int dest_processor_id,
02032                                 T &buf,
02033                                 Request &req,
02034                                 const MessageTag &tag) const
02035 {
02036   START_LOG("send()", "Parallel");
02037 
02038   T* dataptr = &buf;
02039 
02040 #ifndef NDEBUG
02041   // Only catch the return value when asserts are active.
02042   const int ierr =
02043 #endif
02044     ((this->send_mode() == SYNCHRONOUS) ?
02045      MPI_Issend : MPI_Isend) (dataptr,
02046                               1,
02047                               StandardType<T>(dataptr),
02048                               dest_processor_id,
02049                               tag.value(),
02050                               this->get(),
02051                               req.get());
02052 
02053   libmesh_assert (ierr == MPI_SUCCESS);
02054 
02055   STOP_LOG("send()", "Parallel");
02056 }
02057 
02058 
02059 
02060 template <typename T>
02061 inline void Communicator::send (const unsigned int dest_processor_id,
02062                                 std::set<T> &buf,
02063                                 const MessageTag &tag) const
02064 {
02065   this->send(dest_processor_id,
02066              StandardType<T>(buf.empty() ? NULL : &buf.front()), tag);
02067 }
02068 
02069 
02070 
02071 template <typename T>
02072 inline void Communicator::send (const unsigned int dest_processor_id,
02073                                 std::set<T> &buf,
02074                                 Request &req,
02075                                 const MessageTag &tag) const
02076 {
02077   this->send(dest_processor_id,
02078              StandardType<T>(buf.empty() ? NULL : &buf.front()), req, tag);
02079 }
02080 
02081 
02082 
02083 template <typename T>
02084 inline void Communicator::send (const unsigned int dest_processor_id,
02085                                 std::set<T> &buf,
02086                                 const DataType &type,
02087                                 const MessageTag &tag) const
02088 {
02089   START_LOG("send()", "Parallel");
02090 
02091   std::vector<T> vecbuf(buf.begin(), buf.end());
02092   this->send(dest_processor_id, vecbuf, type, tag);
02093 
02094   STOP_LOG("send()", "Parallel");
02095 }
02096 
02097 
02098 
02099 template <typename T>
02100 inline void Communicator::send (const unsigned int dest_processor_id,
02101                                 std::set<T> &buf,
02102                                 const DataType &type,
02103                                 Request &req,
02104                                 const MessageTag &tag) const
02105 {
02106   START_LOG("send()", "Parallel");
02107 
02108   // Allocate temporary buffer on the heap so it lives until after
02109   // the non-blocking send completes
02110   std::vector<T> *vecbuf =
02111     new std::vector<T>(buf.begin(), buf.end());
02112 
02113   // Make the Request::wait() handle deleting the buffer
02114   req.add_post_wait_work
02115     (new Parallel::PostWaitDeleteBuffer<std::vector<T> >(vecbuf));
02116 
02117   this->send(dest_processor_id, *vecbuf, type, req, tag);
02118 
02119   STOP_LOG("send()", "Parallel");
02120 }
02121 
02122 
02123 
02124 template <typename T>
02125 inline void Communicator::send (const unsigned int dest_processor_id,
02126                                 std::vector<T> &buf,
02127                                 const MessageTag &tag) const
02128 {
02129   this->send(dest_processor_id, buf,
02130              StandardType<T>(buf.empty() ? NULL : &buf.front()), tag);
02131 }
02132 
02133 
02134 
02135 template <typename T>
02136 inline void Communicator::send (const unsigned int dest_processor_id,
02137                                 std::vector<T> &buf,
02138                                 Request &req,
02139                                 const MessageTag &tag) const
02140 {
02141   this->send(dest_processor_id, buf,
02142              StandardType<T>(buf.empty() ? NULL : &buf.front()), req, tag);
02143 }
02144 
02145 
02146 
02147 template <typename T>
02148 inline void Communicator::send (const unsigned int dest_processor_id,
02149                                 std::vector<T> &buf,
02150                                 const DataType &type,
02151                                 const MessageTag &tag) const
02152 {
02153   START_LOG("send()", "Parallel");
02154 
02155 #ifndef NDEBUG
02156   // Only catch the return value when asserts are active.
02157   const int ierr =
02158 #endif
02159     ((this->send_mode() == SYNCHRONOUS) ?
02160      MPI_Ssend : MPI_Send) (buf.empty() ? NULL : &buf[0],
02161                             cast_int<int>(buf.size()),
02162                             type,
02163                             dest_processor_id,
02164                             tag.value(),
02165                             this->get());
02166 
02167   libmesh_assert (ierr == MPI_SUCCESS);
02168 
02169   STOP_LOG("send()", "Parallel");
02170 }
02171 
02172 
02173 
02174 template <typename T>
02175 inline void Communicator::send (const unsigned int dest_processor_id,
02176                                 std::vector<T> &buf,
02177                                 const DataType &type,
02178                                 Request &req,
02179                                 const MessageTag &tag) const
02180 {
02181   START_LOG("send()", "Parallel");
02182 
02183 #ifndef NDEBUG
02184   // Only catch the return value when asserts are active.
02185   const int ierr =
02186 #endif
02187     ((this->send_mode() == SYNCHRONOUS) ?
02188      MPI_Issend : MPI_Isend) (buf.empty() ? NULL : &buf[0],
02189                               cast_int<int>(buf.size()),
02190                               type,
02191                               dest_processor_id,
02192                               tag.value(),
02193                               this->get(),
02194                               req.get());
02195 
02196   libmesh_assert (ierr == MPI_SUCCESS);
02197 
02198   STOP_LOG("send()", "Parallel");
02199 }
02200 
02201 
02202 template <typename Context, typename Iter>
02203 inline void Communicator::send_packed_range (const unsigned int dest_processor_id,
02204                                              const Context *context,
02205                                              Iter range_begin,
02206                                              const Iter range_end,
02207                                              const MessageTag &tag) const
02208 {
02209   // We will serialize variable size objects from *range_begin to
02210   // *range_end as a sequence of plain data (e.g. ints) in this buffer
02211   typedef typename std::iterator_traits<Iter>::value_type T;
02212 
02213   std::size_t total_buffer_size =
02214     Parallel::packed_range_size(context, range_begin, range_end);
02215 
02216   this->send(dest_processor_id, total_buffer_size, tag);
02217 
02218 #ifdef DEBUG
02219   std::size_t used_buffer_size = 0;
02220 #endif
02221 
02222   while (range_begin != range_end)
02223     {
02224       std::vector<typename Parallel::BufferType<T>::type> buffer;
02225 
02226       range_begin = Parallel::pack_range(context, range_begin, range_end, buffer);
02227 
02228 #ifdef DEBUG
02229       used_buffer_size += buffer.size();
02230 #endif
02231 
02232       // Blocking send of the buffer
02233       this->send(dest_processor_id, buffer, tag);
02234     }
02235 
02236 #ifdef DEBUG
02237   libmesh_assert_equal_to(used_buffer_size, total_buffer_size);
02238 #endif
02239 }
02240 
02241 
02242 template <typename Context, typename Iter>
02243 inline void Communicator::send_packed_range (const unsigned int dest_processor_id,
02244                                              const Context *context,
02245                                              Iter range_begin,
02246                                              const Iter range_end,
02247                                              Request &req,
02248                                              const MessageTag &tag) const
02249 {
02250   // Allocate a buffer on the heap so we don't have to free it until
02251   // after the Request::wait()
02252   typedef typename std::iterator_traits<Iter>::value_type T;
02253   typedef typename Parallel::BufferType<T>::type buffer_t;
02254 
02255   std::size_t total_buffer_size =
02256     Parallel::packed_range_size(context, range_begin, range_end);
02257 
02258   Request intermediate_req = request();
02259   this->send(dest_processor_id, total_buffer_size, intermediate_req, tag);
02260 
02261   req.add_prior_request(intermediate_req);
02262 
02263 #ifdef DEBUG
02264   std::size_t used_buffer_size = 0;
02265 #endif
02266 
02267   while (range_begin != range_end)
02268     {
02269       std::vector<buffer_t> *buffer = new std::vector<buffer_t>();
02270 
02271       range_begin = Parallel::pack_range(context, range_begin, range_end, *buffer);
02272 
02273 #ifdef DEBUG
02274       used_buffer_size += buffer->size();
02275 #endif
02276 
02277       Request next_intermediate_req;
02278 
02279       Request *my_req = (range_begin == range_end) ? &req : &next_intermediate_req;
02280 
02281       // Make the Request::wait() handle deleting the buffer
02282       my_req->add_post_wait_work
02283         (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t> >
02284          (buffer));
02285 
02286       // Non-blocking send of the buffer
02287       this->send(dest_processor_id, *buffer, *my_req, tag);
02288 
02289       if (range_begin != range_end)
02290         req.add_prior_request(*my_req);
02291     }
02292 }
02293 
02294 
02295 
02296 template <typename T>
02297 inline Status Communicator::receive (const unsigned int src_processor_id,
02298                                      std::basic_string<T> &buf,
02299                                      const MessageTag &tag) const
02300 {
02301   std::vector<T> tempbuf;  // Officially C++ won't let us get a
02302                            // modifiable array from a string
02303 
02304   Status stat = this->receive(src_processor_id, tempbuf, tag);
02305   buf.assign(tempbuf.begin(), tempbuf.end());
02306   return stat;
02307 }
02308 
02309 
02310 
02311 template <typename T>
02312 inline void Communicator::receive (const unsigned int src_processor_id,
02313                                    std::basic_string<T> &buf,
02314                                    Request &req,
02315                                    const MessageTag &tag) const
02316 {
02317   // Officially C++ won't let us get a modifiable array from a
02318   // string, and we can't even put one on the stack for the
02319   // non-blocking case.
02320   std::vector<T> *tempbuf = new std::vector<T>();
02321 
02322   // We can clear the string, but the Request::wait() will need to
02323   // handle copying our temporary buffer to it
02324   buf.clear();
02325 
02326   req.add_post_wait_work
02327     (new Parallel::PostWaitCopyBuffer<std::vector<T>,
02328      std::back_insert_iterator<std::basic_string<T> > >
02329      (tempbuf, std::back_inserter(buf)));
02330 
02331   // Make the Request::wait() then handle deleting the buffer
02332   req.add_post_wait_work
02333     (new Parallel::PostWaitDeleteBuffer<std::vector<T> >(tempbuf));
02334 
02335   this->receive(src_processor_id, tempbuf, req, tag);
02336 }
02337 
02338 
02339 
02340 template <typename T>
02341 inline Status Communicator::receive (const unsigned int src_processor_id,
02342                                      T &buf,
02343                                      const MessageTag &tag) const
02344 {
02345   START_LOG("receive()", "Parallel");
02346 
02347   // Get the status of the message, explicitly provide the
02348   // datatype so we can later query the size
02349   Status stat(this->probe(src_processor_id, tag), StandardType<T>(&buf));
02350 
02351 #ifndef NDEBUG
02352   // Only catch the return value when asserts are active.
02353   const int ierr =
02354 #endif
02355     MPI_Recv (&buf,
02356               1,
02357               StandardType<T>(&buf),
02358               src_processor_id,
02359               tag.value(),
02360               this->get(),
02361               stat.get());
02362   libmesh_assert (ierr == MPI_SUCCESS);
02363 
02364   STOP_LOG("receive()", "Parallel");
02365 
02366   return stat;
02367 }
02368 
02369 
02370 
02371 template <typename T>
02372 inline void Communicator::receive (const unsigned int src_processor_id,
02373                                    T &buf,
02374                                    Request &req,
02375                                    const MessageTag &tag) const
02376 {
02377   START_LOG("receive()", "Parallel");
02378 
02379 #ifndef NDEBUG
02380   // Only catch the return value when asserts are active.
02381   const int ierr =
02382 #endif
02383     MPI_Irecv (&buf,
02384                1,
02385                StandardType<T>(&buf),
02386                src_processor_id,
02387                tag.value(),
02388                this->get(),
02389                req.get());
02390   libmesh_assert (ierr == MPI_SUCCESS);
02391 
02392   STOP_LOG("receive()", "Parallel");
02393 }
02394 
02395 
02396 
02397 template <typename T>
02398 inline Status Communicator::receive (const unsigned int src_processor_id,
02399                                      std::set<T> &buf,
02400                                      const MessageTag &tag) const
02401 {
02402   return this->receive
02403     (src_processor_id, buf,
02404      StandardType<T>(buf.empty() ? NULL : &buf.front()), tag);
02405 }
02406 
02407 
02408 
02409 template <typename T>
02410 inline void Communicator::receive (const unsigned int src_processor_id,
02411                                    std::set<T> &buf,
02412                                    Request &req,
02413                                    const MessageTag &tag) const
02414 {
02415   this->receive (src_processor_id, buf,
02416                  StandardType<T>(buf.empty() ? NULL : &buf.front()), req, tag);
02417 }
02418 
02419 
02420 
02421 template <typename T>
02422 inline Status Communicator::receive (const unsigned int src_processor_id,
02423                                      std::set<T> &buf,
02424                                      const DataType &type,
02425                                      const MessageTag &tag) const
02426 {
02427   START_LOG("receive()", "Parallel");
02428 
02429   std::vector<T> vecbuf;
02430   Status stat = this->receive(src_processor_id, vecbuf, type, tag);
02431   buf.clear();
02432   buf.insert(vecbuf.begin(), vecbuf.end());
02433 
02434   STOP_LOG("receive()", "Parallel");
02435 
02436   return stat;
02437 }
02438 
02439 
02440 
02441 template <typename T>
02442 inline void Communicator::receive (const unsigned int src_processor_id,
02443                                    std::set<T> &buf,
02444                                    const DataType &type,
02445                                    Request &req,
02446                                    const MessageTag &tag) const
02447 {
02448   START_LOG("receive()", "Parallel");
02449 
02450   // Allocate temporary buffer on the heap so it lives until after
02451   // the non-blocking send completes
02452   std::vector<T> *vecbuf = new std::vector<T>();
02453 
02454   // We can clear the set, but the Request::wait() will need to
02455   // handle copying our temporary buffer to it
02456   buf.clear();
02457 
02458   req.add_post_wait_work
02459     (new Parallel::PostWaitCopyBuffer<std::vector<T>,
02460      std::back_insert_iterator<std::set<T> > >
02461      (vecbuf, std::back_inserter(buf)));
02462 
02463   // Make the Request::wait() then handle deleting the buffer
02464   req.add_post_wait_work
02465     (new Parallel::PostWaitDeleteBuffer<std::vector<T> >(vecbuf));
02466 
02467   this->receive(src_processor_id, *vecbuf, type, req, tag);
02468 
02469   STOP_LOG("receive()", "Parallel");
02470 }
02471 
02472 
02473 
02474 template <typename T>
02475 inline Status Communicator::receive (const unsigned int src_processor_id,
02476                                      std::vector<T> &buf,
02477                                      const MessageTag &tag) const
02478 {
02479   return this->receive
02480     (src_processor_id, buf,
02481      StandardType<T>(buf.empty() ? NULL : &buf.front()), tag);
02482 }
02483 
02484 
02485 
02486 template <typename T>
02487 inline void Communicator::receive (const unsigned int src_processor_id,
02488                                    std::vector<T> &buf,
02489                                    Request &req,
02490                                    const MessageTag &tag) const
02491 {
02492   this->receive (src_processor_id, buf,
02493                  StandardType<T>(buf.empty() ? NULL : &buf.front()), req, tag);
02494 }
02495 
02496 
02497 
02498 template <typename T>
02499 inline Status Communicator::receive (const unsigned int src_processor_id,
02500                                      std::vector<T> &buf,
02501                                      const DataType &type,
02502                                      const MessageTag &tag) const
02503 {
02504   START_LOG("receive()", "Parallel");
02505 
02506   // Get the status of the message, explicitly provide the
02507   // datatype so we can later query the size
02508   Status stat(this->probe(src_processor_id, tag), type);
02509 
02510   buf.resize(stat.size());
02511 
02512 #ifndef NDEBUG
02513   // Only catch the return value when asserts are active.
02514   const int ierr =
02515 #endif
02516     MPI_Recv (buf.empty() ? NULL : &buf[0],
02517               cast_int<int>(buf.size()),
02518               type,
02519               src_processor_id,
02520               tag.value(),
02521               this->get(),
02522               stat.get());
02523   libmesh_assert (ierr == MPI_SUCCESS);
02524 
02525   STOP_LOG("receive()", "Parallel");
02526 
02527   return stat;
02528 }
02529 
02530 
02531 
02532 template <typename T>
02533 inline void Communicator::receive (const unsigned int src_processor_id,
02534                                    std::vector<T> &buf,
02535                                    const DataType &type,
02536                                    Request &req,
02537                                    const MessageTag &tag) const
02538 {
02539   START_LOG("receive()", "Parallel");
02540 
02541 #ifndef NDEBUG
02542   // Only catch the return value when asserts are active.
02543   const int ierr =
02544 #endif
02545     MPI_Irecv (buf.empty() ? NULL : &buf[0],
02546                cast_int<int>(buf.size()),
02547                type,
02548                src_processor_id,
02549                tag.value(),
02550                this->get(),
02551                req.get());
02552   libmesh_assert (ierr == MPI_SUCCESS);
02553 
02554   STOP_LOG("receive()", "Parallel");
02555 }
02556 
02557 
02558 template <typename Context, typename OutputIter>
02559 inline void Communicator::receive_packed_range (const unsigned int src_processor_id,
02560                                                 Context *context,
02561                                                 OutputIter out,
02562                                                 const MessageTag &tag) const
02563 {
02564   typedef typename std::iterator_traits<OutputIter>::value_type T;
02565   typedef typename Parallel::BufferType<T>::type buffer_t;
02566 
02567   // Receive serialized variable size objects as sequences of buffer_t
02568   std::size_t total_buffer_size = 0;
02569   this->receive(src_processor_id, total_buffer_size, tag);
02570 
02571   std::size_t received_buffer_size = 0;
02572   while (received_buffer_size < total_buffer_size)
02573     {
02574       std::vector<buffer_t> buffer;
02575       this->receive(src_processor_id, buffer, tag);
02576       received_buffer_size += buffer.size();
02577       Parallel::unpack_range(buffer, context, out);
02578     }
02579 }
02580 
02581 
02582 
02583 // template <typename Context, typename OutputIter>
02584 // inline void Communicator::receive_packed_range (const unsigned int src_processor_id,
02585 //                                                 Context *context,
02586 //                                                 OutputIter out,
02587 //                                                 Request &req,
02588 //                                                 const MessageTag &tag) const
02589 // {
02590 //   typedef typename std::iterator_traits<OutputIter>::value_type T;
02591 //   typedef typename Parallel::BufferType<T>::type buffer_t;
02592 //
02593 //   // Receive serialized variable size objects as a sequence of
02594 //   // buffer_t.
02595 //   // Allocate a buffer on the heap so we don't have to free it until
02596 //   // after the Request::wait()
02597 //   std::vector<buffer_t> *buffer = new std::vector<buffer_t>();
02598 //   this->receive(src_processor_id, *buffer, req, tag);
02599 //
02600 //   // Make the Request::wait() handle unpacking the buffer
02601 //   req.add_post_wait_work
02602 //     (new Parallel::PostWaitUnpackBuffer<std::vector<buffer_t>, Context, OutputIter>
02603 //      (buffer, context, out));
02604 //
02605 //   // Make the Request::wait() then handle deleting the buffer
02606 //   req.add_post_wait_work
02607 //     (new Parallel::PostWaitDeleteBuffer<std::vector<buffer_t> >(buffer));
02608 // }
02609 
02610 
02611 
02612 template <typename T1, typename T2>
02613 inline void Communicator::send_receive(const unsigned int dest_processor_id,
02614                                        std::vector<T1> &sendvec,
02615                                        const DataType &type1,
02616                                        const unsigned int source_processor_id,
02617                                        std::vector<T2> &recv,
02618                                        const DataType &type2,
02619                                        const MessageTag &send_tag,
02620                                        const MessageTag &recv_tag) const
02621 {
02622   START_LOG("send_receive()", "Parallel");
02623 
02624   if (dest_processor_id   == this->rank() &&
02625       source_processor_id == this->rank())
02626     {
02627       recv = sendvec;
02628       STOP_LOG("send_receive()", "Parallel");
02629       return;
02630     }
02631 
02632   Parallel::Request req;
02633 
02634   this->send (dest_processor_id, sendvec, type1, req, send_tag);
02635 
02636   this->receive (source_processor_id, recv, type2, recv_tag);
02637 
02638   req.wait();
02639 
02640   STOP_LOG("send_receive()", "Parallel");
02641 }
02642 
02643 
02644 
02645 template <typename T1, typename T2>
02646 inline void Communicator::send_receive(const unsigned int dest_processor_id,
02647                                        T1 &sendvec,
02648                                        const unsigned int source_processor_id,
02649                                        T2 &recv,
02650                                        const MessageTag &send_tag,
02651                                        const MessageTag &recv_tag) const
02652 {
02653   START_LOG("send_receive()", "Parallel");
02654 
02655   if (dest_processor_id   == this->rank() &&
02656       source_processor_id == this->rank())
02657     {
02658       recv = sendvec;
02659       STOP_LOG("send_receive()", "Parallel");
02660       return;
02661     }
02662 
02663   MPI_Sendrecv(&sendvec, 1, StandardType<T1>(&sendvec),
02664                dest_processor_id, send_tag.value(),
02665                &recv, 1, StandardType<T2>(&recv),
02666                source_processor_id, recv_tag.value(),
02667                this->get(),
02668                MPI_STATUS_IGNORE);
02669 
02670   STOP_LOG("send_receive()", "Parallel");
02671 }
02672 
02673 
02674 
02675 // This is both a declaration and definition for a new overloaded
02676 // function template, so we have to re-specify the default
02677 // arguments.
02678 //
02679 // We specialize on the T1==T2 case so that we can handle
02680 // send_receive-to-self with a plain copy rather than going through
02681 // MPI.
02682 template <typename T>
02683 inline void Communicator::send_receive(const unsigned int dest_processor_id,
02684                                        std::vector<T> &sendvec,
02685                                        const unsigned int source_processor_id,
02686                                        std::vector<T> &recv,
02687                                        const MessageTag &send_tag,
02688                                        const MessageTag &recv_tag) const
02689 {
02690   if (dest_processor_id   == this->rank() &&
02691       source_processor_id == this->rank())
02692     {
02693       START_LOG("send_receive()", "Parallel");
02694       recv = sendvec;
02695       STOP_LOG("send_receive()", "Parallel");
02696       return;
02697     }
02698 
02699   // Call the user-defined type version with automatic
02700   // type conversion based on template argument:
02701   this->send_receive (dest_processor_id, sendvec,
02702                       StandardType<T>(sendvec.empty() ? NULL : &sendvec[0]),
02703                       source_processor_id, recv,
02704                       StandardType<T>(recv.empty() ? NULL : &recv[0]),
02705                       send_tag, recv_tag);
02706 }
02707 
02708 
02709 // This is both a declaration and definition for a new overloaded
02710 // function template, so we have to re-specify the default arguments
02711 template <typename T1, typename T2>
02712 inline void Communicator::send_receive(const unsigned int dest_processor_id,
02713                                        std::vector<T1> &sendvec,
02714                                        const unsigned int source_processor_id,
02715                                        std::vector<T2> &recv,
02716                                        const MessageTag &send_tag,
02717                                        const MessageTag &recv_tag) const
02718 {
02719   // Call the user-defined type version with automatic
02720   // type conversion based on template argument:
02721   this->send_receive (dest_processor_id, sendvec,
02722                       StandardType<T1>(sendvec.empty() ? NULL : &sendvec[0]),
02723                       source_processor_id, recv,
02724                       StandardType<T2>(recv.empty() ? NULL : &recv[0]),
02725                       send_tag, recv_tag);
02726 }
02727 
02728 
02729 
02730 
02731 template <typename T1, typename T2>
02732 inline void Communicator::send_receive(const unsigned int dest_processor_id,
02733                                        std::vector<std::vector<T1> > &sendvec,
02734                                        const unsigned int source_processor_id,
02735                                        std::vector<std::vector<T2> > &recv,
02736                                        const MessageTag & /* send_tag */,
02737                                        const MessageTag & /* recv_tag */) const
02738 {
02739   // FIXME - why aren't we honoring send_tag and recv_tag here?
02740   send_receive_vec_of_vec
02741     (dest_processor_id, sendvec, source_processor_id, recv,
02742      no_tag, any_tag, *this);
02743 }
02744 
02745 
02746 
02747 // This is both a declaration and definition for a new overloaded
02748 // function template, so we have to re-specify the default arguments
02749 template <typename T>
02750 inline void Communicator::send_receive(const unsigned int dest_processor_id,
02751                                        std::vector<std::vector<T> > &sendvec,
02752                                        const unsigned int source_processor_id,
02753                                        std::vector<std::vector<T> > &recv,
02754                                        const MessageTag & /* send_tag */,
02755                                        const MessageTag & /* recv_tag */) const
02756 {
02757   // FIXME - why aren't we honoring send_tag and recv_tag here?
02758   send_receive_vec_of_vec
02759     (dest_processor_id, sendvec, source_processor_id, recv,
02760      no_tag, any_tag, *this);
02761 }
02762 
02763 
02764 
02765 
02766 template <typename Context1, typename RangeIter, typename Context2, typename OutputIter>
02767 inline void Communicator::send_receive_packed_range
02768 (const unsigned int dest_processor_id,
02769  const Context1* context1,
02770  RangeIter send_begin,
02771  const RangeIter send_end,
02772  const unsigned int source_processor_id,
02773  Context2* context2,
02774  OutputIter out,
02775  const MessageTag &send_tag,
02776  const MessageTag &recv_tag) const
02777 {
02778   START_LOG("send_receive()", "Parallel");
02779 
02780   Parallel::Request req;
02781 
02782   this->send_packed_range (dest_processor_id, context1, send_begin, send_end,
02783                            req, send_tag);
02784 
02785   this->receive_packed_range (source_processor_id, context2, out, recv_tag);
02786 
02787   req.wait();
02788 
02789   STOP_LOG("send_receive()", "Parallel");
02790 
02791 }
02792 
02793 
02794 
02795 template <typename T>
02796 inline void Communicator::gather(const unsigned int root_id,
02797                                  T sendval,
02798                                  std::vector<T> &recv) const
02799 {
02800   libmesh_assert_less (root_id, this->size());
02801 
02802   if (this->rank() == root_id)
02803     recv.resize(this->size());
02804 
02805   if (this->size() > 1)
02806     {
02807       START_LOG("gather()", "Parallel");
02808 
02809       StandardType<T> send_type(&sendval);
02810 
02811       MPI_Gather(&sendval,
02812                  1,
02813                  send_type,
02814                  recv.empty() ? NULL : &recv[0],
02815                  1,
02816                  send_type,
02817                  root_id,
02818                  this->get());
02819 
02820       STOP_LOG("gather()", "Parallel");
02821     }
02822   else
02823     recv[0] = sendval;
02824 }
02825 
02826 
02827 
02828 template <typename T>
02829 inline void Communicator::gather(const unsigned int root_id,
02830                                  std::vector<T> &r) const
02831 {
02832   if (this->size() == 1)
02833     {
02834       libmesh_assert (!this->rank());
02835       libmesh_assert (!root_id);
02836       return;
02837     }
02838 
02839   libmesh_assert_less (root_id, this->size());
02840 
02841   std::vector<int>
02842     sendlengths  (this->size(), 0),
02843     displacements(this->size(), 0);
02844 
02845   const int mysize = static_cast<int>(r.size());
02846   this->allgather(mysize, sendlengths);
02847 
02848   START_LOG("gather()", "Parallel");
02849 
02850   // Find the total size of the final array and
02851   // set up the displacement offsets for each processor.
02852   unsigned int globalsize = 0;
02853   for (unsigned int i=0; i != this->size(); ++i)
02854     {
02855       displacements[i] = globalsize;
02856       globalsize += sendlengths[i];
02857     }
02858 
02859   // Check for quick return
02860   if (globalsize == 0)
02861     {
02862       STOP_LOG("gather()", "Parallel");
02863       return;
02864     }
02865 
02866   // copy the input buffer
02867   std::vector<T> r_src(r);
02868 
02869   // now resize it to hold the global data
02870   // on the receiving processor
02871   if (root_id == this->rank())
02872     r.resize(globalsize);
02873 
02874   // and get the data from the remote processors
02875 #ifndef NDEBUG
02876   // Only catch the return value when asserts are active.
02877   const int ierr =
02878 #endif
02879     MPI_Gatherv (r_src.empty() ? NULL : &r_src[0], mysize, StandardType<T>(),
02880                  r.empty() ? NULL : &r[0], &sendlengths[0],
02881                  &displacements[0], StandardType<T>(),
02882                  root_id,
02883                  this->get());
02884 
02885   libmesh_assert (ierr == MPI_SUCCESS);
02886 
02887   STOP_LOG("gather()", "Parallel");
02888 }
02889 
02890 
02891 template <typename T>
02892 inline void Communicator::allgather(T sendval,
02893                                     std::vector<T> &recv) const
02894 {
02895   START_LOG ("allgather()","Parallel");
02896 
02897   libmesh_assert(this->size());
02898   recv.resize(this->size());
02899 
02900   unsigned int comm_size = this->size();
02901   if (comm_size > 1)
02902     {
02903       StandardType<T> send_type(&sendval);
02904 
02905       MPI_Allgather (&sendval,
02906                      1,
02907                      send_type,
02908                      &recv[0],
02909                      1,
02910                      send_type,
02911                      this->get());
02912     }
02913   else if (comm_size > 0)
02914     recv[0] = sendval;
02915 
02916   STOP_LOG ("allgather()","Parallel");
02917 }
02918 
02919 
02920 
02921 template <typename T>
02922 inline void Communicator::allgather
02923 (std::vector<T> &r,
02924  const bool identical_buffer_sizes) const
02925 {
02926   if (this->size() < 2)
02927     return;
02928 
02929   START_LOG("allgather()", "Parallel");
02930 
02931   if (identical_buffer_sizes)
02932     {
02933       if (r.empty())
02934         return;
02935 
02936       libmesh_assert(this->verify(r.size()));
02937 
02938       std::vector<T> r_src(r.size()*this->size());
02939       r_src.swap(r);
02940       StandardType<T> send_type(&r_src[0]);
02941 
02942       MPI_Allgather (&r_src[0],
02943                      cast_int<int>(r_src.size()),
02944                      send_type,
02945                      &r[0],
02946                      cast_int<int>(r_src.size()),
02947                      send_type,
02948                      this->get());
02949       libmesh_assert(this->verify(r));
02950       STOP_LOG("allgather()", "Parallel");
02951       return;
02952     }
02953 
02954   std::vector<int>
02955     sendlengths  (this->size(), 0),
02956     displacements(this->size(), 0);
02957 
02958   const int mysize = static_cast<int>(r.size());
02959   this->allgather(mysize, sendlengths);
02960 
02961   // Find the total size of the final array and
02962   // set up the displacement offsets for each processor.
02963   unsigned int globalsize = 0;
02964   for (unsigned int i=0; i != this->size(); ++i)
02965     {
02966       displacements[i] = globalsize;
02967       globalsize += sendlengths[i];
02968     }
02969 
02970   // Check for quick return
02971   if (globalsize == 0)
02972     {
02973       STOP_LOG("allgather()", "Parallel");
02974       return;
02975     }
02976 
02977   // copy the input buffer
02978   std::vector<T> r_src(globalsize);
02979   r_src.swap(r);
02980 
02981   StandardType<T> send_type(&r[0]);
02982 
02983   // and get the data from the remote processors.
02984   // Pass NULL if our vector is empty.
02985 #ifndef NDEBUG
02986   // Only catch the return value when asserts are active.
02987   const int ierr =
02988 #endif
02989     MPI_Allgatherv (r_src.empty() ? NULL : &r_src[0], mysize, send_type,
02990                     &r[0], &sendlengths[0],
02991                     &displacements[0], send_type, this->get());
02992 
02993   libmesh_assert (ierr == MPI_SUCCESS);
02994 
02995   STOP_LOG("allgather()", "Parallel");
02996 }
02997 
02998 
02999 template <typename Context, typename Iter, typename OutputIter>
03000 inline void Communicator::gather_packed_range
03001 (const unsigned int root_id,
03002  Context *context,
03003  Iter range_begin,
03004  const Iter range_end,
03005  OutputIter out) const
03006 {
03007   typedef typename std::iterator_traits<Iter>::value_type T;
03008   typedef typename Parallel::BufferType<T>::type buffer_t;
03009 
03010   bool nonempty_range = (range_begin != range_end);
03011   this->max(nonempty_range);
03012 
03013   while (nonempty_range)
03014     {
03015       // We will serialize variable size objects from *range_begin to
03016       // *range_end as a sequence of ints in this buffer
03017       std::vector<buffer_t> buffer;
03018 
03019       range_begin = Parallel::pack_range(context, range_begin, range_end, buffer);
03020 
03021       this->gather(root_id, buffer);
03022 
03023       Parallel::unpack_range(buffer, context, out);
03024 
03025       nonempty_range = (range_begin != range_end);
03026       this->max(nonempty_range);
03027     }
03028 }
03029 
03030 
03031 template <typename Context, typename Iter, typename OutputIter>
03032 inline void Communicator::allgather_packed_range
03033 (Context *context,
03034  Iter range_begin,
03035  const Iter range_end,
03036  OutputIter out) const
03037 {
03038   typedef typename std::iterator_traits<Iter>::value_type T;
03039   typedef typename Parallel::BufferType<T>::type buffer_t;
03040 
03041   bool nonempty_range = (range_begin != range_end);
03042   this->max(nonempty_range);
03043 
03044   while (nonempty_range)
03045     {
03046       // We will serialize variable size objects from *range_begin to
03047       // *range_end as a sequence of ints in this buffer
03048       std::vector<buffer_t> buffer;
03049 
03050       range_begin = Parallel::pack_range(context, range_begin, range_end, buffer);
03051 
03052       this->allgather(buffer, false);
03053 
03054       libmesh_assert(buffer.size());
03055 
03056       Parallel::unpack_range(buffer, context, out);
03057 
03058       nonempty_range = (range_begin != range_end);
03059       this->max(nonempty_range);
03060     }
03061 }
03062 
03063 
03064 template <typename T>
03065 inline void Communicator::alltoall(std::vector<T> &buf) const
03066 {
03067   if (this->size() < 2 || buf.empty())
03068     return;
03069 
03070   START_LOG("alltoall()", "Parallel");
03071 
03072   // the per-processor size.  this is the same for all
03073   // processors using MPI_Alltoall, could be variable
03074   // using MPI_Alltoallv
03075   const int size_per_proc =
03076     cast_int<int>(buf.size()/this->size());
03077 
03078   libmesh_assert_equal_to (buf.size()%this->size(), 0);
03079 
03080   libmesh_assert(this->verify(size_per_proc));
03081 
03082   std::vector<T> tmp(buf);
03083 
03084   StandardType<T> send_type(&tmp[0]);
03085 
03086 #ifndef NDEBUG
03087   // Only catch the return value when asserts are active.
03088   const int ierr =
03089 #endif
03090     MPI_Alltoall (&tmp[0],
03091                   size_per_proc,
03092                   send_type,
03093                   &buf[0],
03094                   size_per_proc,
03095                   send_type,
03096                   this->get());
03097   libmesh_assert (ierr == MPI_SUCCESS);
03098 
03099   STOP_LOG("alltoall()", "Parallel");
03100 }
03101 
03102 
03103 
03104 template <typename T>
03105 inline void Communicator::broadcast (T &data, const unsigned int root_id) const
03106 {
03107   if (this->size() == 1)
03108     {
03109       libmesh_assert (!this->rank());
03110       libmesh_assert (!root_id);
03111       return;
03112     }
03113 
03114   libmesh_assert_less (root_id, this->size());
03115 
03116   START_LOG("broadcast()", "Parallel");
03117 
03118   // Spread data to remote processors.
03119 #ifndef NDEBUG
03120   // Only catch the return value when asserts are active.
03121   const int ierr =
03122 #endif
03123     MPI_Bcast (&data, 1, StandardType<T>(&data), root_id, this->get());
03124 
03125   libmesh_assert (ierr == MPI_SUCCESS);
03126 
03127   STOP_LOG("broadcast()", "Parallel");
03128 }
03129 
03130 
03131 template <typename T>
03132 inline void Communicator::broadcast (std::basic_string<T> &data,
03133                                      const unsigned int root_id) const
03134 {
03135   if (this->size() == 1)
03136     {
03137       libmesh_assert (!this->rank());
03138       libmesh_assert (!root_id);
03139       return;
03140     }
03141 
03142   libmesh_assert_less (root_id, this->size());
03143 
03144   START_LOG("broadcast()", "Parallel");
03145 
03146   std::size_t data_size = data.size();
03147   this->broadcast(data_size, root_id);
03148 
03149   std::vector<T> data_c(data_size);
03150 #ifndef NDEBUG
03151   std::string orig(data);
03152 #endif
03153 
03154   if (this->rank() == root_id)
03155     for(std::size_t i=0; i<data.size(); i++)
03156       data_c[i] = data[i];
03157 
03158   this->broadcast (data_c, root_id);
03159 
03160   data.assign(data_c.begin(), data_c.end());
03161 
03162 #ifndef NDEBUG
03163   if (this->rank() == root_id)
03164     libmesh_assert_equal_to (data, orig);
03165 #endif
03166 
03167   STOP_LOG("broadcast()", "Parallel");
03168 }
03169 
03170 
03171 
03172 template <typename T>
03173 inline void Communicator::broadcast (std::vector<T> &data,
03174                                      const unsigned int root_id) const
03175 {
03176   if (this->size() == 1)
03177     {
03178       libmesh_assert (!this->rank());
03179       libmesh_assert (!root_id);
03180       return;
03181     }
03182 
03183   libmesh_assert_less (root_id, this->size());
03184 
03185   START_LOG("broadcast()", "Parallel");
03186 
03187   // and get the data from the remote processors.
03188   // Pass NULL if our vector is empty.
03189   T *data_ptr = data.empty() ? NULL : &data[0];
03190 
03191 #ifndef NDEBUG
03192   // Only catch the return value when asserts are active.
03193   const int ierr =
03194 #endif
03195     MPI_Bcast (data_ptr, cast_int<int>(data.size()),
03196                StandardType<T>(data_ptr), root_id, this->get());
03197 
03198   libmesh_assert (ierr == MPI_SUCCESS);
03199 
03200   STOP_LOG("broadcast()", "Parallel");
03201 }
03202 
03203 
03204 template <typename T>
03205 inline void Communicator::broadcast (std::vector<std::basic_string<T> > &data,
03206                                      const unsigned int root_id) const
03207 {
03208   if (this->size() == 1)
03209     {
03210       libmesh_assert (!this->rank());
03211       libmesh_assert (!root_id);
03212       return;
03213     }
03214 
03215   libmesh_assert_less (root_id, this->size());
03216 
03217   START_LOG("broadcast()", "Parallel");
03218 
03219   std::size_t bufsize=0;
03220   if (root_id == this->rank())
03221     {
03222       for (std::size_t i=0; i<data.size(); ++i)
03223         bufsize += data[i].size() + 1;  // Add one for the string length word
03224     }
03225   this->broadcast(bufsize, root_id);
03226 
03227   // Here we use unsigned int to store up to 32-bit characters
03228   std::vector<unsigned int> temp; temp.reserve(bufsize);
03229   // Pack the strings
03230   if (root_id == this->rank())
03231     {
03232       for (unsigned int i=0; i<data.size(); ++i)
03233         {
03234           temp.push_back(cast_int<unsigned int>(data[i].size()));
03235           for (std::size_t j=0; j != data[i].size(); ++j)
03240             temp.push_back(data[i][j]);
03241         }
03242     }
03243   else
03244     temp.resize(bufsize);
03245 
03246   // broad cast the packed strings
03247   this->broadcast(temp, root_id);
03248 
03249   // Unpack the strings
03250   if (root_id != this->rank())
03251     {
03252       data.clear();
03253       std::vector<unsigned int>::const_iterator iter = temp.begin();
03254       while (iter != temp.end())
03255         {
03256           std::size_t curr_len = *iter++;
03257           data.push_back(std::string(iter, iter+curr_len));
03258           iter += curr_len;
03259         }
03260     }
03261 
03262   STOP_LOG("broadcast()", "Parallel");
03263 }
03264 
03265 
03266 
03267 
03268 template <typename T>
03269 inline void Communicator::broadcast (std::set<T> &data,
03270                                      const unsigned int root_id) const
03271 {
03272   if (this->size() == 1)
03273     {
03274       libmesh_assert (!this->rank());
03275       libmesh_assert (!root_id);
03276       return;
03277     }
03278 
03279   libmesh_assert_less (root_id, this->size());
03280 
03281   START_LOG("broadcast()", "Parallel");
03282 
03283   std::vector<T> vecdata;
03284   if (this->rank() == root_id)
03285     vecdata.assign(data.begin(), data.end());
03286 
03287   std::size_t vecsize = vecdata.size();
03288   this->broadcast(vecsize, root_id);
03289   if (this->rank() != root_id)
03290     vecdata.resize(vecsize);
03291 
03292   this->broadcast(vecdata, root_id);
03293   if (this->rank() != root_id)
03294     {
03295       data.clear();
03296       data.insert(vecdata.begin(), vecdata.end());
03297     }
03298 
03299   STOP_LOG("broadcast()", "Parallel");
03300 }
03301 
03302 
03303 
03304 template <typename T1, typename T2>
03305 inline void Communicator::broadcast(std::map<T1, T2> &data,
03306                                     const unsigned int root_id) const
03307 {
03308   if (this->size() == 1)
03309     {
03310       libmesh_assert (!this->rank());
03311       libmesh_assert (!root_id);
03312       return;
03313     }
03314 
03315   libmesh_assert_less (root_id, this->size());
03316 
03317   START_LOG("broadcast()", "Parallel");
03318 
03319   std::size_t data_size=data.size();
03320   this->broadcast(data_size, root_id);
03321 
03322   std::vector<T1> pair_first; pair_first.reserve(data_size);
03323   std::vector<T2> pair_second; pair_first.reserve(data_size);
03324 
03325   if (root_id == this->rank())
03326     {
03327       for (typename std::map<T1, T2>::const_iterator it = data.begin();
03328            it != data.end(); ++it)
03329         {
03330           pair_first.push_back(it->first);
03331           pair_second.push_back(it->second);
03332         }
03333     }
03334   else
03335     {
03336       pair_first.resize(data_size);
03337       pair_second.resize(data_size);
03338     }
03339 
03340   this->broadcast(pair_first, root_id);
03341   this->broadcast(pair_second, root_id);
03342 
03343   libmesh_assert(pair_first.size() == pair_first.size());
03344 
03345   if (this->rank() != root_id)
03346     {
03347       data.clear();
03348       for (std::size_t i=0; i<pair_first.size(); ++i)
03349         data[pair_first[i]] = pair_second[i];
03350     }
03351   STOP_LOG("broadcast()", "Parallel");
03352 }
03353 
03354 
03355 
03356 template <typename Context, typename OutputContext,
03357           typename Iter, typename OutputIter>
03358 inline void Communicator::broadcast_packed_range
03359 (const Context *context1,
03360  Iter range_begin,
03361  const Iter range_end,
03362  OutputContext *context2,
03363  OutputIter out,
03364  const unsigned int root_id) const
03365 {
03366   typedef typename std::iterator_traits<Iter>::value_type T;
03367   typedef typename Parallel::BufferType<T>::type buffer_t;
03368 
03369   do
03370     {
03371       // We will serialize variable size objects from *range_begin to
03372       // *range_end as a sequence of ints in this buffer
03373       std::vector<buffer_t> buffer;
03374 
03375       if (this->rank() == root_id)
03376         range_begin = Parallel::pack_range(context1, range_begin, range_end, buffer);
03377 
03378       // this->broadcast(vector) requires the receiving vectors to
03379       // already be the appropriate size
03380       std::size_t buffer_size = buffer.size();
03381       this->broadcast (buffer_size, root_id);
03382 
03383       // We continue until there's nothing left to broadcast
03384       if (!buffer_size)
03385         break;
03386 
03387       buffer.resize(buffer_size);
03388 
03389       // Broadcast the packed data
03390       this->broadcast (buffer, root_id);
03391 
03392       if (this->rank() != root_id)
03393         Parallel::unpack_range(buffer, context2, out);
03394     } while (true);  // break above when we reach buffer_size==0
03395 }
03396 
03397 
03398 #else // LIBMESH_HAVE_MPI
03399 
03400 template <typename T>
03401 inline bool Communicator::verify(const T &) const { return true; }
03402 
03403 template <typename T>
03404 inline bool Communicator::semiverify(const T *) const { return true; }
03405 
03406 template <typename T>
03407 inline void Communicator::min(T &) const {}
03408 
03409 template <typename T>
03410 inline void Communicator::minloc(T &, unsigned int &min_id) const { min_id = 0; }
03411 
03412 template <typename T>
03413 inline void Communicator::minloc
03414 (std::vector<T> &r, std::vector<unsigned int> &min_id) const
03415 { for (std::size_t i=0; i!= r.size(); ++i) min_id[i] = 0; }
03416 
03417 template <typename T>
03418 inline void Communicator::max(T &) const {}
03419 
03420 template <typename T>
03421 inline void Communicator::maxloc(T &, unsigned int &max_id) const { max_id = 0; }
03422 
03423 template <typename T>
03424 inline void Communicator::maxloc
03425 (std::vector<T> &r, std::vector<unsigned int> &max_id) const
03426 { for (std::size_t i=0; i!= r.size(); ++i) max_id[i] = 0; }
03427 
03428 template <typename T>
03429 inline void Communicator::sum(T &) const {}
03430 
03431 template <typename T>
03432 inline void Communicator::set_union(T&) const {}
03433 
03434 template <typename T>
03435 inline void Communicator::set_union(T&, const unsigned int root_id) const
03436 { libmesh_assert_equal_to(root_id, 0); }
03437 
03441 inline status Communicator::probe (const unsigned int,
03442                                    const MessageTag&) const
03443 { libmesh_not_implemented(); status s; return s; }
03444 
03448 template <typename T>
03449 inline void Communicator::send (const unsigned int, T&, const MessageTag &) const
03450 { libmesh_not_implemented(); }
03451 
03452 template <typename T>
03453 inline void Communicator::send (const unsigned int, T&, Request&,
03454                                 const MessageTag&) const
03455 { libmesh_not_implemented(); }
03456 
03457 template <typename T>
03458 inline void Communicator::send (const unsigned int, T&, const DataType&,
03459                                 const MessageTag &) const
03460 { libmesh_not_implemented(); }
03461 
03462 template <typename T>
03463 inline void Communicator::send (const unsigned int, T&, const DataType&, Request&,
03464                                 const MessageTag &) const
03465 { libmesh_not_implemented(); }
03466 
03467 template <typename Context, typename Iter>
03468 inline void Communicator::send_packed_range
03469 (const unsigned int, const Context*, Iter, const Iter, const MessageTag&) const
03470 { libmesh_not_implemented(); }
03471 
03472 template <typename Context, typename Iter>
03473 inline void Communicator::send_packed_range
03474 (const unsigned int, const Context*, Iter, const Iter, Request&,
03475  const MessageTag&) const
03476 { libmesh_not_implemented(); }
03477 
03481 template <typename T>
03482 inline Status Communicator::receive (const unsigned int, T&, const MessageTag&) const
03483 { libmesh_not_implemented(); return Status(); }
03484 
03485 template <typename T>
03486 inline void Communicator::receive
03487 (const unsigned int, T&, Request&, const MessageTag&) const
03488 { libmesh_not_implemented(); }
03489 
03490 template <typename T>
03491 inline Status Communicator::receive
03492 (const unsigned int, T&, const DataType&, const MessageTag&) const
03493 { libmesh_not_implemented(); return Status(); }
03494 
03495 template <typename T>
03496 inline void Communicator::receive
03497 (const unsigned int, T&, const DataType&, Request&, const MessageTag&) const
03498 { libmesh_not_implemented(); }
03499 
03500 template <typename Context, typename OutputIter>
03501 inline void Communicator::receive_packed_range
03502 (const unsigned int, Context*, OutputIter, const MessageTag&) const
03503 { libmesh_not_implemented(); }
03504 
03505 // template <typename Context, typename OutputIter>
03506 // inline void Communicator::receive_packed_range
03507 // (const unsigned int, Context*, OutputIter, Request&, const MessageTag&) const
03508 // { libmesh_not_implemented(); }
03509 
03513 template <typename T1, typename T2>
03514 inline void Communicator::send_receive (const unsigned int send_tgt,
03515                                         T1 &send_val,
03516                                         const unsigned int recv_source,
03517                                         T2 &recv_val,
03518                                         const MessageTag &,
03519                                         const MessageTag &) const
03520 {
03521   libmesh_assert_equal_to (send_tgt, 0);
03522   libmesh_assert_equal_to (recv_source, 0);
03523   recv_val = send_val;
03524 }
03525 
03531 template <typename Context1, typename RangeIter,
03532           typename Context2, typename OutputIter>
03533 inline void Communicator::send_receive_packed_range
03534 (const unsigned int /* dest_processor_id */, const Context1*,
03535  RangeIter /* send_begin */, const RangeIter /* send_end */,
03536  const unsigned int /* source_processor_id */, Context2*,
03537  OutputIter /* out */, const MessageTag &, const MessageTag &) const
03538 { libmesh_not_implemented(); }
03539 
03543 template <typename T>
03544 inline void Communicator::gather(const unsigned int root_id,
03545                                  T send_val,
03546                                  std::vector<T> &recv_val) const
03547 {
03548   libmesh_assert_equal_to (root_id, 0);
03549   recv_val.resize(1);
03550   recv_val[0] = send_val;
03551 }
03552 
03553 template <typename T>
03554 inline void Communicator::gather(const unsigned int root_id, std::vector<T>&) const
03555 { libmesh_assert_equal_to(root_id, 0); }
03556 
03557 template <typename T>
03558 inline void Communicator::allgather(T send_val, std::vector<T> &recv_val) const
03559 {
03560   recv_val.resize(1);
03561   recv_val[0] = send_val;
03562 }
03563 
03564 template <typename T>
03565 inline void Communicator::allgather(std::vector<T> &, const bool) const {}
03566 
03567 template <typename T>
03568 inline void Communicator::alltoall(std::vector<T> &) const {}
03569 
03570 template <typename T>
03571 inline void Communicator::broadcast (T &, const unsigned int root_id) const
03572 { libmesh_assert_equal_to(root_id, 0); }
03573 
03574 #endif // LIBMESH_HAVE_MPI
03575 
03576 } // namespace Parallel
03577 
03578 } // namespace libMesh
03579 
03580 #endif // LIBMESH_PARALLEL_IMPLEMENTATION_H