Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

#include <stdio.h> #include <stdlib.h> #include <assert.h> #include <sys/time.h>

ID: 3689671 • Letter: #

Question

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <sys/time.h>
#include <time.h>

#define USE_MPI 0

#if USE_MPI
#include <mpi.h>
#endif

static double timer() {
struct timeval tp;
gettimeofday(&tp, NULL);
return ((double) (tp.tv_sec) + 1e-6 * tp.tv_usec);
}

int main(int argc, char **argv) {

int rank, num_tasks;

/* Initialize MPI */
#if USE_MPI
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// printf("Hello world from rank %3d of %3d ", rank, num_tasks);
#else
rank = 0;
num_tasks = 1;
#endif

if (argc != 3) {
if (rank == 0) {
fprintf(stderr, "%s <m> <k> ", argv[0]);
fprintf(stderr, "Program for parallel Game of Life ");
fprintf(stderr, "with 1D grid partitioning ");
fprintf(stderr, "<m>: grid dimension (an mxm grid is created) ");
fprintf(stderr, "<k>: number of time steps ");
fprintf(stderr, "(initial pattern specified inside code) ");
#if USE_MPI
MPI_Abort(MPI_COMM_WORLD, 1);
#else
exit(1);
#endif
}
}

int m, k;

m = atoi(argv[1]);
assert(m > 2);
assert(m <= 10000);

k = atoi(argv[2]);
assert(k > 0);
assert(k <= 1000);

/* ensure that m is a multiple of num_tasks */
m = (m/num_tasks) * num_tasks;
  
int m_p = (m/num_tasks);

/* print new m to let user know n has been modified */
if (rank == 0) {
fprintf(stderr, "Using m: %d, m_p: %d, k: %d ", m, m_p, k);
fprintf(stderr, "Requires %3.6lf MB of memory per task ",
((2*4.0*m_p)*m/1e6));
}

/* Linearizing 2D grids to 1D using row-major ordering */
/* grid[i][j] would be grid[i*n+j] */
int *grid_current;
int *grid_next;
  
grid_current = (int *) malloc(m_p * m * sizeof(int));
assert(grid_current != 0);

grid_next = (int *) malloc(m_p * m * sizeof(int));
assert(grid_next != 0);

int i, j, t;

/* static initalization, so that we can verify output */
/* using very simple initialization right now */
/* this isn't a good check for parallel debugging */
#ifdef _OPENMP
#pragma omp parallel for private(i,j)
#endif
for (i=0; i<m_p; i++) {
for (j=0; j<m; j++) {
grid_current[i*m+j] = 0;
grid_next[i*m+j] = 0;
}
}

/* initializing some cells in the middle */
assert((m*m_p/2 + m/2 + 3) < m_p*m);
grid_current[m*m_p/2 + m/2 + 0] = 1;
grid_current[m*m_p/2 + m/2 + 1] = 1;
grid_current[m*m_p/2 + m/2 + 2] = 1;
grid_current[m*m_p/2 + m/2 + 3] = 1;

#if USE_MPI
MPI_Barrier(MPI_COMM_WORLD);
#endif

double elt = 0.0;
if (rank == 0)
elt = timer();

#if USE_MPI
/* Parallel code goes here */
/* grid_current and grid_next must be updated correctly */
if (rank == 0)
fprintf(stderr, "Fill in code here ");

#else
/* serial code */
/* considering only internal cells */
for (t=0; t<k; t++) {
for (i=1; i<m-1; i++) {
for (j=1; j<m-1; j++) {
/* avoiding conditionals inside inner loop */
int prev_state = grid_current[i*m+j];
int num_alive =
grid_current[(i )*m+j-1] +
grid_current[(i )*m+j+1] +
grid_current[(i-1)*m+j-1] +
grid_current[(i-1)*m+j ] +
grid_current[(i-1)*m+j+1] +
grid_current[(i+1)*m+j-1] +
grid_current[(i+1)*m+j ] +
grid_current[(i+1)*m+j+1];

grid_next[i*m+j] = prev_state * ((num_alive == 2) + (num_alive == 3)) + (1 - prev_state) * (num_alive == 3);
}
}
/* swap current and next */
int *grid_tmp = grid_next;
grid_next = grid_current;
grid_current = grid_tmp;
}
#endif

if (rank == 0)
elt = timer() - elt;

/* Verify */
int verify_failed = 0;
for (i=0; i<m_p; i++) {
for (j=0; j<m; j++) {
/* Add verification code here */
}
}

if (verify_failed) {
fprintf(stderr, "ERROR: rank %d, verification failed, exiting! ", rank);
#if USE_MPI
MPI_Abort(MPI_COMM_WORLD, 2);
#else
exit(2);
#endif
}

if (rank == 0) {
fprintf(stderr, "Time taken: %3.3lf s. ", elt);
fprintf(stderr, "Performance: %3.3lf billion cell updates/s ",
(1.0*m*m)*k/(elt*1e9));
}

/* free memory */
free(grid_current); free(grid_next);

/* Shut down MPI */
#if USE_MPI
MPI_Finalize();
#endif

return 0;
}

Explanation / Answer

#include #include #include #include #include /* No OpenMP for this HW */ #ifdef _OPENMP #include #endif /* set this define to 0 to build serial code */ #define USE_MPI 1 #if USE_MPI #include #endif #define SEND_B 0 // tag for mpi communication of matrix B static double timer() { struct timeval tp; gettimeofday(&tp, NULL); return ((double) (tp.tv_sec) + 1e-6 * tp.tv_usec); /* The code below is for another high resolution timer */ /* I'm using gettimeofday because it's more portable */ /* struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return ((double) (tp.tv_sec) + 1e-9 * tp.tv_nsec); */ } int main(int argc, char **argv) { int rank, num_tasks; /* Initialize MPI */ #if USE_MPI MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // printf("Hello world from rank %3d of %3d ", rank, num_tasks); // set id of next processor after the current (proc 0 and last one loop around) int nextProc = (rank+1) % num_tasks; int prevProc = (rank+num_tasks-1) % num_tasks; //can't remember how % is defined on negatives, so add num_tasks for safety #else rank = 0; num_tasks = 1; #endif if (argc != 2) { if (rank == 0) { fprintf(stderr, "%s ", argv[0]); fprintf(stderr, "Program for parallel dense matrix-matrix multiplication "); fprintf(stderr, "with 1D row partitioning "); fprintf(stderr, ": matrix dimension (an nxn dense matrix is created) "); #if USE_MPI MPI_Abort(MPI_COMM_WORLD, 1); #else exit(1); #endif } } int n; n = atoi(argv[1]); assert(n > 0); assert(n < 10000); /* ensure that n is a multiple of num_tasks */ n = (n/num_tasks) * num_tasks; int n_p = (n/num_tasks); /* print new n to let user know n has been modified */ if (rank == 0) { fprintf(stderr, "n: %d, n_p: %d ", n, n_p); fprintf(stderr, "Requires %3.6lf MB of memory per task ", ((3*4.0*n_p)*n/1e6)); } float *A, *B, *C; A = (float *) malloc(n_p * n * sizeof(float)); assert(A != 0); B = (float *) malloc(n_p * n * sizeof(float)); assert(B != 0); C = (float *) malloc(n_p * n * sizeof(float)); assert(C != 0); /* linearized matrices in row-major storage */ /* A[i][j] would be A[i*n+j] */ int i, j; /* static initalization, so that we can verify output */ /* using very simple initialization right now */ /* this isn't a good check for parallel debugging */ #ifdef _OPENMP #pragma omp parallel for private(i,j) #endif for (i=0; i