Task 2.4 - Parallelizing Matrix Multiplication You are given a skeleton sequenti
ID: 3911195 • Letter: T
Question
Task 2.4 - Parallelizing Matrix Multiplication
You are given a skeleton sequential program for matrix multiplication below:
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#define M 500
#define N 500
int main(int argc, char *argv) {
//set number of threads here
omp_set_num_threads(8);
int i, j, k;
double sum;
double **A, **B, **C;
A = malloc(M*sizeof(double *));
B = malloc(M*sizeof(double *));
C = malloc(M*sizeof(double *));
for (i = 0; i < M; i++) {
A[i] = malloc(N*sizeof(double));
B[i] = malloc(N*sizeof(double));
C[i] = malloc(N*sizeof(double));
}
double start, end;
for (i = 0; i < M; i++) {
for (j = 0; j < N; j++) {
A[i][j] = j*1;
B[i][j] = i*j+2;
C[i][j] = j-i*2;
}
}
start = omp_get_wtime();
for (i = 0; i < M; i++) {
for (j = 0; j < N; j++) {
sum = 0;
for (k=0; k < M; k++) {
sum += A[i][k]*B[k][j];
}
C[i][j] = sum;
}
}
end = omp_get_wtime();
printf("Time of computation: %f ", end-start);
}
You are to parallelize this algorithm in three different ways:
Add the necessary pragma to parallelize the outer for loop
Remove the pragma for the outer for loop and create a pragma for the middle for loop
3. Add the necessary pragmas to parallelize both the outer and middle for loops
and collect timing data given 2 thread, 4 threads, 8 threads, 16 threads, 32 threads and three matrix sizes. You
will find that when you run the same program several times, the timing values can vary significantly. Therefore
for each set of conditions, collect five data values and average them. Use a spreadsheet program either from MS Office or OpenOffice to collect and store your data and perform the necessary calculations and illustrate the results visually with Charts.
Explanation / Answer
#include <stdio.h>
#include <time.h>
#include <omp.h>
#include <stdlib.h>
int main() {
int i, j, n, a[719][719], b[719], c[719];
clock_t start = clock();
n = 100; //Max 719
printf("Matrix A ");
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j) {
a[i][j] = 10;
printf("%d ", a[i][j]);
}
printf(" ");
}
printf(" Matrix B ");
#pragma omp parallel private(i) shared(b)
{
#pragma omp for
for (i = 0; i < n; ++i) {
b[i] = 5;
printf("%d ", b[i]);
}
}
printf(" A * B ");
#pragma omp parallel private(i) shared(c)
{
#pragma omp for
for (i = 0; i < n; ++i) {
c[i] = 0;
}
}
#pragma omp parallel private(i,j) shared(n,a,b,c)
{
#pragma omp for schedule(dynamic)
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j) {
c[i] += b[j] * a[j][i];
}
}
}
#pragma omp parallel private(i) shared(c)
{
#pragma omp for
for (i = 0; i < n; ++i) {
printf("%d ", c[i]);
}
}
clock_t stop = clock();
double elapsed = (double) (stop - start) / CLOCKS_PER_SEC;
printf(" Time elapsed: %.5f ", elapsed);
start = clock();
printf("Matrix A ");
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j) {
a[i][j] = 10;
printf("%d ", a[i][j]);
}
printf(" ");
}
printf(" Matrix B ");
#pragma omp parallel private(i) shared(b)
{
#pragma omp for
for (i = 0; i < n; ++i) {
b[i] = 5;
printf("%d ", b[i]);
}
}
printf(" A * B ");
omp_set_num_threads(4);
#pragma omp parallel for
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
c[i] += b[j] * a[j][i];
}
}
stop = clock();
elapsed = (double) (stop - start) / CLOCKS_PER_SEC;
printf(" Time elapsed: %.5f ", elapsed);
return 0;
}
2ND Method
#include <algorithm>
#include <omp.h>
#include <bitset>
#include <cctype>
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <deque>
#include <fstream>
#include <iostream>
#include <list>
#include <map>
#include <queue>
#include <set>
#include <sstream>
#include <stack>
#include <string>
#include <utility>
#include <vector>
#include <assert.h>
using namespace std;
#define READ(f) freopen(f, "r", stdin)
#define WRITE(f) freopen(f, "w", stdout)
#define pks printf("Case %d: ",++ks);
#define mx 1002
int a[mx][mx];
int b[mx][mx];
int c[mx][mx];
int d[mx][mx];
void generate_matrix(int n)
{
int i,j;
for(i=0;i<n;i++)
{
for(j=0;j<n;j++)
{
a[i][j]=rand()%100;
b[i][j]=rand()%100;
}
}
}
void check(int n)
{
int i,j;
for(i=0;i<n;i++)
{
for(j=0;j<n;j++)
assert(c[i][j]==d[i][j]);
}
}
void matrix_mult_serial(int n)
{
int i,j,k;
double st=omp_get_wtime();
for(i=0;i<n;i++)
{
for(j=0;j<n;j++)
{
for(k=0;k<n;k++)
{
c[i][j]+=a[i][k]*b[k][j];
}
}
}
double en=omp_get_wtime();
printf("Serial: %lf ",en-st);
}
void matrix_mult_parallel1(int n)
{
//Static Scheduler
memset(d,0,sizeof d);
int i,j,k;
double st=omp_get_wtime();
#pragma omp parallel for schedule(static,50) collapse(2) private(i,j,k)shared(a,b,c)
for(i=0;i<n;i++)for( j=0;j<n;j++)for(k=0;k<n;k++)d[i][j]+=a[i][k]*b[k][j];
double en=omp_get_wtime();
printf("Parallel-1(Static Scheduler) %lf ",en-st);
check(n);
}
void matrix_mult_parallel2(int n)
{
//Dynamic Scheduler
memset(d,0,sizeof d);
int i,j,k;
double st=omp_get_wtime();
#pragma omp parallel for schedule(dynamic,50) collapse(2) private(i,j,k) shared(a,b,c)
for(i=0;i<n;i++)for( j=0;j<n;j++)for(k=0;k<n;k++)d[i][j]+=a[i][k]*b[k][j];
double en=omp_get_wtime();
printf("Parallel-2(Dynamic Scheduler) %lf ",en-st);
check(n);
}
int main() {
//READ("in");
//WRITE("out2");
int n=500;
generate_matrix(n);
matrix_mult_serial(n);
matrix_mult_parallel1(n);
matrix_mult_parallel2(n);
return 0;
}
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.