forked from shiningzhang0926/CSE522_Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parallel_dense_mm.c
99 lines (80 loc) · 2.79 KB
/
parallel_dense_mm.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/******************************************************************************
*
* dense_mm.c
*
* This program implements a dense matrix multiply and can be used as a
* hypothetical workload.
*
* Usage: This program takes a single input describing the size of the matrices
* to multiply. For an input of size N, it computes A*B = C where each
* of A, B, and C are matrices of size N*N. Matrices A and B are filled
* with random values.
*
* Written Sept 6, 2015 by David Ferry
******************************************************************************/
#include <stdio.h> //For printf()
#include <stdlib.h> //For exit() and atoi()
#include <assert.h> //For assert()
const int num_expected_args = 2;
const unsigned sqrt_of_UINT32_MAX = 65536;
// The following line can be used to verify that the parallel computation
// gives identical results to the serial computation. If the verficiation is
// successful then the program executes normally. If the verification fails
// the program will terminate with an assertion error.
//#define VERIFY_PARALLEL
int main( int argc, char* argv[] ){
unsigned index, row, col; //loop indicies
unsigned matrix_size, squared_size;
double *A, *B, *C;
#ifdef VERIFY_PARALLEL
double *D;
#endif
if( argc != num_expected_args ){
printf("Usage: ./dense_mm <size of matrices>\n");
exit(-1);
}
matrix_size = atoi(argv[1]);
if( matrix_size > sqrt_of_UINT32_MAX ){
printf("ERROR: Matrix size must be between zero and 65536!\n");
exit(-1);
}
squared_size = matrix_size * matrix_size;
printf("Generating matrices...\n");
A = (double*) malloc( sizeof(double) * squared_size );
B = (double*) malloc( sizeof(double) * squared_size );
C = (double*) malloc( sizeof(double) * squared_size );
#ifdef VERIFY_PARALLEL
D = (double*) malloc( sizeof(double) * squared_size );
#endif
for( index = 0; index < squared_size; index++ ){
A[index] = (double) rand();
B[index] = (double) rand();
C[index] = 0.0;
#ifdef VERIFY_PARALLEL
D[index] = 0.0;
#endif
}
printf("Multiplying matrices...\n");
#pragma omp parallel for private(col, row, index)
for( col = 0; col < matrix_size; col++ ){
for( row = 0; row < matrix_size; row++ ){
for( index = 0; index < matrix_size; index++){
C[row*matrix_size + col] += A[row*matrix_size + index] *B[index*matrix_size + col];
}
}
}
#ifdef VERIFY_PARALLEL
printf("Verifying parallel matrix multiplication...\n");
for( col = 0; col < matrix_size; col++ ){
for( row = 0; row < matrix_size; row++ ){
for( index = 0; index < matrix_size; index++){
D[row*matrix_size + col] += A[row*matrix_size + index] *B[index*matrix_size + col];
}
}
}
for( index = 0; index < squared_size; index++ )
assert( C[index] == D[index] );
#endif //ifdef VERIFY_PARALLEL
printf("Multiplication done!\n");
return 0;
}