// classic.cpp : "Textbook" implementation of matrix multiply
// Author: Paul J. Drongowski
// Address: Boston Design Center
// Advanced Micro Devices, Inc.
// Boxborough, MA 01719
// Date: 20 October 2005
//
// Copyright (c) 2005 Advanced Micro Devices, Inc.
// Celem tego programu jest prezentacja pomiaru i analizy
//efektywnosci programu za pomocą CodeAnalyst(tm).
// Implementacja mnożenia macierzy jest realizowana za pomoca typowego
// algorytmu podręcznikowego.
// Efektywność tego podejscia jest niska poprzez
// nieefektywną kolejnosć odwołań do elementów macierzy.
#include <stdio.h>
#include <time.h>
#include <windows.h>
#include "omp.h"
#include <string>
#define USE_MULTIPLE_THREADS true
#define MAXTHREADS 128
int NumThreads;
double start;
static const int ROWS = 2048; // liczba wierszy macierzy
static const int COLUMNS = 2048; // lizba kolumn macierzy
int r = 256;
float matrix_a[ROWS][COLUMNS]; // lewy operand
float matrix_b[ROWS][COLUMNS]; // prawy operand
float matrix_r[ROWS][COLUMNS]; // wynik
float result = 0;
float resultParallel = 0;
float result6 = 0;
float resultParallel6 = 0;
float minResult = 100;
float minResultParallel = 100;
float minResult6 = 100;
float minResultParallel6 = 100;
float maxResult = 0;
float maxResultParallel = 0;
float maxResult6 = 0;
float maxResultParallel6 = 0;
float tmp;
FILE *result_file;
void initialize_matrices()
{
// zdefiniowanie zawarosci poczatkowej macierzy
#pragma omp parallel for
for (int i = 0; i < ROWS; i++) {
for (int j = 0; j < COLUMNS; j++) {
matrix_a[i][j] = (float)rand() / RAND_MAX;
matrix_b[i][j] = (float)rand() / RAND_MAX;
matrix_r[i][j] = 0.0;
}
}
}
void initialize_matricesZ()
{
// zdefiniowanie zawarosci poczatkowej macierzy
#pragma omp parallel for
for (int i = 0; i < ROWS; i++) {
for (int j = 0; j < COLUMNS; j++) {
matrix_r[i][j] = 0.0;
}
}
}
void print_result()
{
// wydruk wyniku
for (int i = 0; i < ROWS; i++) {
for (int j = 0; j < COLUMNS; j++) {
fprintf(result_file, "%6.4f ", matrix_r[i][j]);
}
fprintf(result_file, "\n");
}
}
void multiply_matrices_IKJ()
{
for (int i = 0; i < ROWS; i++)
for (int k = 0; k < COLUMNS; k++)
for (int j = 0; j < COLUMNS; j++)
matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
}
void multiply_matrices_IKJ_Parallel()
{
#pragma omp parallel for
for (int i = 0; i < ROWS; i++)
for (int k = 0; k < COLUMNS; k++)
for (int j = 0; j < COLUMNS; j++)
{
matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
}
}
void multiply_matrices_JKI()
{
// mnozenie macierzy
for (int j = 0; j < COLUMNS; j++)
for (int k = 0; k < COLUMNS; k++)
for (int i = 0; i < ROWS; i++)
matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
}
void multiply_matrices_JKI_Parallel()
{
// mnozenie macierzy
#pragma omp parallel for
for (int j = 0; j < COLUMNS; j++)
for (int k = 0; k < COLUMNS; k++)
for (int i = 0; i < ROWS; i++)
matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
}
void multiply_matrices_IJK_IKJ()
{
for (int i = 0; i < ROWS; i += r)
for (int j = 0; j < COLUMNS; j += r)
for (int k = 0; k < COLUMNS; k += r)
for (int ii = i; ii < i + r; ii++) // WYNIK CZĘŚCIOWY
for (int kk = k; kk < k + r; kk++)
for (int jj = j; jj < j + r; jj++)
matrix_r[ii][jj] += matrix_a[ii][kk] * matrix_b[kk][jj];
}
void multiply_matrices_IJK_IKJ_Parallel()
{
#pragma omp parallel for
for (int i = 0; i < ROWS; i += r)
for (int j = 0; j < COLUMNS; j += r)
for (int k = 0; k < COLUMNS; k += r)
for (int ii = i; ii < i + r; ii++) // WYNIK CZĘŚCIOWY
for (int kk = k; kk < k + r; kk++)
for (int jj = j; jj < j + r; jj++)
matrix_r[ii][jj] += matrix_a[ii][kk] * matrix_b[kk][jj];
}
float print_elapsed_time(std::string info)
{
double elapsed;
double resolution;
// wyznaczenie i zapisanie czasu przetwarzania
elapsed = (double)clock() / CLK_TCK;
resolution = 1.0 / CLK_TCK;
fprintf(result_file,
"Czas wykonania programu %s: %8.4f sec (%6.4f sec rozdzielczosc pomiaru)\n",
info.c_str(), elapsed - start, resolution);
return elapsed - start;
}
void enterLine()
{
fprintf(result_file,
"\n"
);
fprintf(result_file,
"\n"
);
}
void load_IKJ()
{
for (int i = 0;i < 100;i++)
{
initialize_matricesZ();
start = (double)clock() / CLK_TCK;
multiply_matrices_IKJ();
tmp = print_elapsed_time("Sekwencyjnie IKJ ");
result += tmp;
if (tmp > maxResult)
{
maxResult = tmp;
}
if (tmp < minResult)
{
minResult = tmp;
}
}
enterLine();
}
void loadParallel_IKJ()
{
for (int i = 0; i < 100; i++)
{
initialize_matricesZ();
start = (double)clock() / CLK_TCK;
multiply_matrices_IKJ_Parallel();
tmp = print_elapsed_time("Rownolegle IKJ ");
resultParallel += tmp;
if (tmp > maxResultParallel)
{
maxResultParallel = tmp;
}
if (tmp < minResultParallel)
{
minResultParallel = tmp;
}
}
enterLine();
}
void load_IJK_IKJ()
{
for (int i = 0; i < 100; i++)
{
initialize_matricesZ();
start = (double)clock() / CLK_TCK;
multiply_matrices_IJK_IKJ();
tmp = print_elapsed_time("Sekwencyjnie IJK_IKJ ");
result6 += tmp;
if (tmp > maxResult6)
{
maxResult6 = tmp;
}
if (tmp < minResult6)
{
minResult6 = tmp;
}
}
enterLine();
}
void loadParallel_IJK_IKJ()
{
for (int i = 0; i < 100; i++)
{
initialize_matricesZ();
start = (double)clock() / CLK_TCK;
multiply_matrices_IJK_IKJ_Parallel();
tmp = print_elapsed_time("Rownolegle IJK_IKJ");
resultParallel6 += tmp;
if (tmp > maxResultParallel6)
{
maxResultParallel6 = tmp;
}
if (tmp < minResultParallel6)
{
minResultParallel6 = tmp;
}
}
enterLine();
}
void save_IKJ()
{
float avgResult = result / 100;
fprintf(result_file,
"Średnia sekwencyjnego %8.4f \n", avgResult
);
fprintf(result_file,
"Max sekwencyjnego %8.4f \n", maxResult
);
fprintf(result_file,
"Min sekwencyjnego %8.4f \n", minResult
);
enterLine();
}
void saveParallel_IKJ()
{
float avgResultParallel = resultParallel / 100;
fprintf(result_file,
"Średnia równoległego %8.4f \n", avgResultParallel
);
fprintf(result_file,
"Min równoległego %8.4f \n", minResultParallel
);
fprintf(result_file,
"Max równoległego %8.4f \n", maxResultParallel
);
fclose(result_file);
}
void saveParallel_IJK_IKJ()
{
float avgResultParallel = resultParallel6 / 100;
fprintf(result_file,
"Średnia równoległego IJK_IKJ %8.4f \n", avgResultParallel
);
fprintf(result_file,
"Min równoległego IJK_IKJ %8.4f \n", minResultParallel6
);
fprintf(result_file,
"Max równoległego IJK_IKJ %8.4f \n", maxResultParallel6
);
fclose(result_file);
}
void save_IJK_IKJ()
{
float avgResult = result6 / 100;
fprintf(result_file,
"Średnia sekwencyjnego IJK_IKJ %8.4f \n", avgResult
);
fprintf(result_file,
"Max sekwencyjnego IJK_IKJ %8.4f \n", maxResult6
);
fprintf(result_file,
"Min sekwencyjnego IJK_IKJ %8.4f \n", minResult6
);
enterLine();
}
int main(int argc, char* argv[])
{
omp_set_num_threads(4);
// start = (double) clock() / CLK_TCK ;
if ((result_file = fopen("classic.txt", "w")) == NULL) {
fprintf(stderr, "nie mozna otworzyc pliku wyniku \n");
perror("classic");
return(EXIT_FAILURE);
}
//Determine the number of threads to use
if (USE_MULTIPLE_THREADS) {
SYSTEM_INFO SysInfo;
GetSystemInfo(&SysInfo);
NumThreads = SysInfo.dwNumberOfProcessors;
if (NumThreads > MAXTHREADS)
NumThreads = MAXTHREADS;
}
else
NumThreads = 1;
fprintf(result_file, "Klasyczny algorytm mnozenia macierzy, liczba watkow %d \n", NumThreads);
printf("liczba watkow = %d\n\n", NumThreads);
//load_IKJ();
//loadParallel_IKJ();
//save_IKJ();
//saveParallel_IKJ();
//load_IJK_IKJ();
//loadParallel_IJK_IKJ();
//
//
//save_IJK_IKJ();
//saveParallel_IJK_IKJ();
initialize_matricesZ();
start = (double)clock() / CLK_TCK;
multiply_matrices_IJK_IKJ_Parallel();
tmp = print_elapsed_time("Sekwencyjnie IJK_IKJ ");
return(0);
}