Task 2.4 - Parallelizing Matrix Multiplication You are given a skeleton sequenti

ID: 3911195 • Letter: T

Question

Task 2.4 - Parallelizing Matrix Multiplication

You are given a skeleton sequential program for matrix multiplication below:

#include <omp.h>

#include <stdio.h>

#include <stdlib.h>

#define M 500

#define N 500

int main(int argc, char *argv) {

//set number of threads here

omp_set_num_threads(8);

int i, j, k;

double sum;

double **A, **B, **C;

A = malloc(M*sizeof(double *));

B = malloc(M*sizeof(double *));

C = malloc(M*sizeof(double *));

for (i = 0; i < M; i++) {

A[i] = malloc(N*sizeof(double));

B[i] = malloc(N*sizeof(double));

C[i] = malloc(N*sizeof(double));

}

double start, end;

for (i = 0; i < M; i++) {

for (j = 0; j < N; j++) {

A[i][j] = j*1;

B[i][j] = i*j+2;

C[i][j] = j-i*2;

}

start = omp_get_wtime();

for (i = 0; i < M; i++) {

for (j = 0; j < N; j++) {

sum = 0;

for (k=0; k < M; k++) {

sum += A[i][k]*B[k][j];

}

C[i][j] = sum;

}

end = omp_get_wtime();

printf("Time of computation: %f ", end-start);

}

You are to parallelize this algorithm in three different ways:

Add the necessary pragma to parallelize the outer for loop

Remove the pragma for the outer for loop and create a pragma for the middle for loop

3. Add the necessary pragmas to parallelize both the outer and middle for loops

and collect timing data given 2 thread, 4 threads, 8 threads, 16 threads, 32 threads and three matrix sizes. You

will find that when you run the same program several times, the timing values can vary significantly. Therefore

for each set of conditions, collect five data values and average them. Use a spreadsheet program either from MS Office or OpenOffice to collect and store your data and perform the necessary calculations and illustrate the results visually with Charts.

Explanation / Answer

#include <stdio.h>

#include <time.h>

#include <omp.h>

#include <stdlib.h>

int main() {

int i, j, n, a[719][719], b[719], c[719];

clock_t start = clock();

n = 100; //Max 719

printf("Matrix A ");

for (i = 0; i < n; ++i) {

for (j = 0; j < n; ++j) {

a[i][j] = 10;

printf("%d ", a[i][j]);

}

printf(" ");

}

printf(" Matrix B ");

#pragma omp parallel private(i) shared(b)

{

#pragma omp for

for (i = 0; i < n; ++i) {

b[i] = 5;

printf("%d ", b[i]);

}

printf(" A * B ");

#pragma omp parallel private(i) shared(c)

{

#pragma omp for

for (i = 0; i < n; ++i) {

c[i] = 0;

}

#pragma omp parallel private(i,j) shared(n,a,b,c)

{

#pragma omp for schedule(dynamic)

for (i = 0; i < n; ++i) {

for (j = 0; j < n; ++j) {

c[i] += b[j] * a[j][i];

}

#pragma omp parallel private(i) shared(c)

{

#pragma omp for

for (i = 0; i < n; ++i) {

printf("%d ", c[i]);

}

clock_t stop = clock();

double elapsed = (double) (stop - start) / CLOCKS_PER_SEC;

printf(" Time elapsed: %.5f ", elapsed);

start = clock();

printf("Matrix A ");

for (i = 0; i < n; ++i) {

for (j = 0; j < n; ++j) {

a[i][j] = 10;

printf("%d ", a[i][j]);

}

printf(" ");

}

printf(" Matrix B ");

#pragma omp parallel private(i) shared(b)

{

#pragma omp for

for (i = 0; i < n; ++i) {

b[i] = 5;

printf("%d ", b[i]);

}

printf(" A * B ");

omp_set_num_threads(4);

#pragma omp parallel for

for (i = 0; i < n; i++) {

for (j = 0; j < n; j++) {

c[i] += b[j] * a[j][i];

}

stop = clock();

elapsed = (double) (stop - start) / CLOCKS_PER_SEC;

printf(" Time elapsed: %.5f ", elapsed);

return 0;

}

2ND Method

#include <algorithm>

#include <omp.h>

#include <bitset>

#include <cctype>

#include <cmath>

#include <cstdio>

#include <cstdlib>

#include <cstring>

#include <ctime>

#include <deque>

#include <fstream>

#include <iostream>

#include <list>

#include <map>

#include <queue>

#include <set>

#include <sstream>

#include <stack>

#include <string>

#include <utility>

#include <vector>

#include <assert.h>

using namespace std;

#define READ(f) freopen(f, "r", stdin)

#define WRITE(f) freopen(f, "w", stdout)

#define pks printf("Case %d: ",++ks);

#define mx 1002

int a[mx][mx];

int b[mx][mx];

int c[mx][mx];

int d[mx][mx];

void generate_matrix(int n)

{

int i,j;

for(i=0;i<n;i++)

{

for(j=0;j<n;j++)

{

a[i][j]=rand()%100;

b[i][j]=rand()%100;

}

void check(int n)

{

int i,j;

for(i=0;i<n;i++)

{

for(j=0;j<n;j++)

assert(c[i][j]==d[i][j]);

}

void matrix_mult_serial(int n)

{

int i,j,k;

double st=omp_get_wtime();

for(i=0;i<n;i++)

{

for(j=0;j<n;j++)

{

for(k=0;k<n;k++)

{

c[i][j]+=a[i][k]*b[k][j];

}

double en=omp_get_wtime();

printf("Serial: %lf ",en-st);

}

void matrix_mult_parallel1(int n)

{

//Static Scheduler

memset(d,0,sizeof d);

int i,j,k;

double st=omp_get_wtime();

#pragma omp parallel for schedule(static,50) collapse(2) private(i,j,k)shared(a,b,c)

for(i=0;i<n;i++)for( j=0;j<n;j++)for(k=0;k<n;k++)d[i][j]+=a[i][k]*b[k][j];

double en=omp_get_wtime();

printf("Parallel-1(Static Scheduler) %lf ",en-st);

check(n);

}

void matrix_mult_parallel2(int n)

{

//Dynamic Scheduler

memset(d,0,sizeof d);

int i,j,k;

double st=omp_get_wtime();

#pragma omp parallel for schedule(dynamic,50) collapse(2) private(i,j,k) shared(a,b,c)

for(i=0;i<n;i++)for( j=0;j<n;j++)for(k=0;k<n;k++)d[i][j]+=a[i][k]*b[k][j];

double en=omp_get_wtime();

printf("Parallel-2(Dynamic Scheduler) %lf ",en-st);

check(n);

}

int main() {

//READ("in");

//WRITE("out2");

int n=500;

generate_matrix(n);

matrix_mult_serial(n);

matrix_mult_parallel1(n);

matrix_mult_parallel2(n);

return 0;

}

Navigate

Task 2.4 - Parallelizing Matrix Multiplication You are given a skeleton sequenti

Task 2: (1000-1200 words) 6 Marks 2.1. Implementation of 3Es’ in your selected c

Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.

Task 2.4 - Parallelizing Matrix Multiplication You are given a skeleton sequenti

Question

Explanation / Answer

Related Questions

Navigate