
#include <mpi.h>

#include <iostream>
#include <iomanip>

#include <cstdlib>

#include <vector>
#include <numeric>
#include <algorithm>

/**
  author: Olexandr Popov
  mail to: nevrazlyvy@gmail.com
*/

template< typename T >
 class dynamic_matrix
  {
    public:
      typedef typename std::vector<T>::size_type size_type;

    public:
      dynamic_matrix(size_type m, size_type n): matr(m * n), n(n) {}

      dynamic_matrix(): n(0) {}

      void resize(size_type m, size_type n) { matr.resize(m * n); this->n = n; }

      T * begin() { return &matr.front(); }
      T * end() { return &matr.back() + 1; }
      T * operator[](size_type i) { return &matr[i * n]; }

    private:
      std::vector<T> matr;
      size_type n;
  };


int main(int argc, char **argv)
{
  int errorkod = MPI_Init(&argc, &argv); 
	  
   if(errorkod)
    {
      std::cerr << "\nError starting MPI programm!\n";	   
      MPI_Abort(MPI_COMM_WORLD, errorkod);
    }

  int NumberProcesses;
  int id; 

   MPI_Comm_size(MPI_COMM_WORLD, &NumberProcesses);
   MPI_Comm_rank(MPI_COMM_WORLD, &id);

  int n;
  double t1, t2;

   if(!id)
    {
     while(1)
      {
        std::cout << "Please, enter the order of the matrix: " << std::flush;
        std::cin >> n; 

        if(n > 0 && std::cin) 
          break;

        std::cout << "\nError: Order of the matrix must > 0!\n";

        std::cin.clear();
        std::cin.sync();
      }

      t1 = MPI_Wtime();
    }  

   MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);


  int NumberActiveProcesses = std::min(NumberProcesses, n);

  int m = n / NumberActiveProcesses + (id >= NumberActiveProcesses - n % NumberActiveProcesses);

  long long sum = 0;

  dynamic_matrix<int> matr;

  std::vector<MPI_Request> array_of_requests;


   if(!id)
    {
       matr.resize(n, n);
     
       std::generate(matr.begin(), matr.end(), rand);
       
       array_of_requests.resize(NumberActiveProcesses - 1); 

      int i = 1;
      int row = 0;
      int m_slv;

       for(; i < NumberActiveProcesses; ++i) 
        {
          m_slv = n / NumberActiveProcesses + (i >= NumberActiveProcesses - n % NumberActiveProcesses);
          MPI_Isend(matr[row], m_slv * n, MPI_INT, i, i, MPI_COMM_WORLD, &array_of_requests[i - 1]);
          row += m_slv;
        }

       sum = std::accumulate(matr[row], matr.end(), sum);

    }
   else
    if(id < NumberActiveProcesses)
     {
       matr.resize(m, n);

       MPI_Recv(matr[0], m * n, MPI_INT, 0, id, MPI_COMM_WORLD, &MPI_Status());

       sum = std::accumulate(matr.begin(), matr.end(), sum);

     }

  long long total_sum;

   MPI_Reduce(&sum, &total_sum, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD);


   if(!id)
    {
      t2 = MPI_Wtime();

     int i = 0;
     int j;

      std::cout << "\nMatrix:\n\n";

      for(; i < n; ++i)
       {
         for(j = 0; j < n; ++j)
           std::cout << std::setw(11) << matr[i][j]; 
         
         std::cout << '\n';
       }

      std::cout << "\nTotal sum of all elemets is " << total_sum;
      std::cout << "\nCalculation time is " << (t2 - t1) << " seconds\n";
    }


   MPI_Finalize();

  return 0;
}
