- // classic.cpp : "Textbook" implementation of matrix multiply
- // Author: Paul J. Drongowski
- // Address: Boston Design Center
- // Advanced Micro Devices, Inc.
- // Boxborough, MA 01719
- // Date: 20 October 2005
- //
- // Copyright (c) 2005 Advanced Micro Devices, Inc.
- // Celem tego programu jest prezentacja pomiaru i analizy
- //efektywnosci programu za pomocą CodeAnalyst(tm).
- // Implementacja mnożenia macierzy jest realizowana za pomoca typowego
- // algorytmu podręcznikowego.
- // Efektywność tego podejscia jest niska poprzez
- // nieefektywną kolejnosć odwołań do elementów macierzy.
- #include <stdio.h>
- #include <time.h>
- #include <windows.h>
- #include "omp.h"
- #include <string>
- #define USE_MULTIPLE_THREADS true
- #define MAXTHREADS 128
- int NumThreads;
- double start;
- static const int ROWS = 512; // liczba wierszy macierzy
- static const int COLUMNS = 512; // lizba kolumn macierzy
- static const int R = 128;
- float matrix_a[ROWS][COLUMNS]; // lewy operand
- float matrix_b[ROWS][COLUMNS]; // prawy operand
- float matrix_r[ROWS][COLUMNS]; // wynik
- float result = 0;
- float resultParallel = 0;
- float result6 = 0;
- float resultParallel6 = 0;
- float minResult = 100;
- float minResultParallel = 100;
- float minResult6 = 100;
- float minResultParallel6 = 100;
- float maxResult = 0;
- float maxResultParallel = 0;
- float maxResult6 = 0;
- float maxResultParallel6 = 0;
- float tmp;
- int r = 256;
- FILE *result_file;
- void initialize_matrices()
- {
- // zdefiniowanie zawarosci poczatkowej macierzy
- #pragma omp parallel for
- for (int i = 0; i < ROWS; i++) {
- for (int j = 0; j < COLUMNS; j++) {
- matrix_a[i][j] = (float)rand() / RAND_MAX;
- matrix_b[i][j] = (float)rand() / RAND_MAX;
- matrix_r[i][j] = 0.0;
- }
- }
- }
- void initialize_matricesZ()
- {
- // zdefiniowanie zawarosci poczatkowej macierzy
- #pragma omp parallel for
- for (int i = 0; i < ROWS; i++) {
- for (int j = 0; j < COLUMNS; j++) {
- matrix_r[i][j] = 0.0;
- }
- }
- }
- void print_result()
- {
- // wydruk wyniku
- for (int i = 0; i < ROWS; i++) {
- for (int j = 0; j < COLUMNS; j++) {
- fprintf(result_file, "%6.4f ", matrix_r[i][j]);
- }
- fprintf(result_file, "\n");
- }
- }
- void multiply_matrices_IKJ()
- {
- for (int i = 0; i < ROWS; i++)
- for (int k = 0; k < COLUMNS; k++)
- for (int j = 0; j < COLUMNS; j++)
- matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
- }
- void multiply_matrices_IKJ_Parallel()
- {
- #pragma omp parallel for
- for (int i = 0; i < ROWS; i++)
- for (int k = 0; k < COLUMNS; k++)
- for (int j = 0; j < COLUMNS; j++)
- {
- matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
- }
- }
- void multiply_matrices_JKI()
- {
- // mnozenie macierzy
- for (int j = 0; j < COLUMNS; j++)
- for (int k = 0; k < COLUMNS; k++)
- for (int i = 0; i < ROWS; i++)
- matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
- }
- void multiply_matrices_JKI_Parallel()
- {
- // mnozenie macierzy
- #pragma omp parallel for
- for (int j = 0; j < COLUMNS; j++)
- for (int k = 0; k < COLUMNS; k++)
- for (int i = 0; i < ROWS; i++)
- matrix_r[i][j] += matrix_a[i][k] * matrix_b[k][j];
- }
- void multiply_matrices_IJK_IKJ()
- {
- for (int i = 0; i < ROWS; i += r)
- for (int j = 0; j < COLUMNS; j += r)
- for (int k = 0; k < COLUMNS; k += r)
- for (int ii = i; ii < i + r; ii++) // WYNIK CZĘŚCIOWY
- for (int kk = k; kk < k + r; kk++)
- for (int jj = j; jj < j + r; jj++)
- matrix_r[ii][jj] += matrix_a[ii][kk] * matrix_b[kk][jj];
- }
- void multiply_matrices_IJK_IKJ_Parallel()
- {
- #pragma omp parallel for
- for (int i = 0; i < ROWS; i += r)
- for (int j = 0; j < COLUMNS; j += r)
- for (int k = 0; k < COLUMNS; k += r)
- for (int ii = i; ii < i + r; ii++) // WYNIK CZĘŚCIOWY
- for (int kk = k; kk < k + r; kk++)
- for (int jj = j; jj < j + r; jj++)
- matrix_r[ii][jj] += matrix_a[ii][kk] * matrix_b[kk][jj];
- }
- float print_elapsed_time(std::string info)
- {
- double elapsed;
- double resolution;
- // wyznaczenie i zapisanie czasu przetwarzania
- elapsed = (double)clock() / CLK_TCK;
- resolution = 1.0 / CLK_TCK;
- fprintf(result_file,
- "Czas wykonania programu %s: %8.4f sec (%6.4f sec rozdzielczosc pomiaru)\n",
- info.c_str(), elapsed - start, resolution);
- return elapsed - start;
- }
- void enterLine()
- {
- fprintf(result_file,
- "\n"
- );
- fprintf(result_file,
- "\n"
- );
- }
- void load_IKJ()
- {
- for (int i = 0;i < 100;i++)
- {
- initialize_matricesZ();
- start = (double)clock() / CLK_TCK;
- multiply_matrices_IKJ();
- tmp = print_elapsed_time("Sekwencyjnie IKJ ");
- result += tmp;
- if (tmp > maxResult)
- {
- maxResult = tmp;
- }
- if (tmp < minResult)
- {
- minResult = tmp;
- }
- }
- enterLine();
- }
- void loadParallel_IKJ()
- {
- for (int i = 0; i < 100; i++)
- {
- initialize_matricesZ();
- start = (double)clock() / CLK_TCK;
- multiply_matrices_IKJ_Parallel();
- tmp = print_elapsed_time("Rownolegle IKJ ");
- resultParallel += tmp;
- if (tmp > maxResultParallel)
- {
- maxResultParallel = tmp;
- }
- if (tmp < minResultParallel)
- {
- minResultParallel = tmp;
- }
- }
- enterLine();
- }
- void load_IJK_IKJ()
- {
- for (int i = 0; i < 100; i++)
- {
- initialize_matricesZ();
- start = (double)clock() / CLK_TCK;
- multiply_matrices_IJK_IKJ();
- tmp = print_elapsed_time("Sekwencyjnie IJK_IKJ ");
- result6 += tmp;
- if (tmp > maxResult6)
- {
- maxResult6 = tmp;
- }
- if (tmp < minResult6)
- {
- minResult6 = tmp;
- }
- }
- enterLine();
- }
- void loadParallel_IJK_IKJ()
- {
- for (int i = 0; i < 100; i++)
- {
- initialize_matricesZ();
- start = (double)clock() / CLK_TCK;
- multiply_matrices_IJK_IKJ_Parallel();
- tmp = print_elapsed_time("Rownolegle IJK_IKJ");
- resultParallel6 += tmp;
- if (tmp > maxResultParallel6)
- {
- maxResultParallel6 = tmp;
- }
- if (tmp < minResultParallel6)
- {
- minResultParallel6 = tmp;
- }
- }
- enterLine();
- }
- void save_IKJ()
- {
- float avgResult = result / 100;
- fprintf(result_file,
- "Średnia sekwencyjnego %8.4f \n", avgResult
- );
- fprintf(result_file,
- "Max sekwencyjnego %8.4f \n", maxResult
- );
- fprintf(result_file,
- "Min sekwencyjnego %8.4f \n", minResult
- );
- enterLine();
- }
- void saveParallel_IKJ()
- {
- float avgResultParallel = resultParallel / 100;
- fprintf(result_file,
- "Średnia równoległego %8.4f \n", avgResultParallel
- );
- fprintf(result_file,
- "Min równoległego %8.4f \n", minResultParallel
- );
- fprintf(result_file,
- "Max równoległego %8.4f \n", maxResultParallel
- );
- fclose(result_file);
- }
- void saveParallel_IJK_IKJ()
- {
- float avgResultParallel = resultParallel6 / 100;
- fprintf(result_file,
- "Średnia równoległego IJK_IKJ %8.4f \n", avgResultParallel
- );
- fprintf(result_file,
- "Min równoległego IJK_IKJ %8.4f \n", minResultParallel6
- );
- fprintf(result_file,
- "Max równoległego IJK_IKJ %8.4f \n", maxResultParallel6
- );
- fclose(result_file);
- }
- void save_IJK_IKJ()
- {
- float avgResult = result6 / 100;
- fprintf(result_file,
- "Średnia sekwencyjnego IJK_IKJ %8.4f \n", avgResult
- );
- fprintf(result_file,
- "Max sekwencyjnego IJK_IKJ %8.4f \n", maxResult6
- );
- fprintf(result_file,
- "Min sekwencyjnego IJK_IKJ %8.4f \n", minResult6
- );
- enterLine();
- }
- int main(int argc, char* argv[])
- {
- omp_set_num_threads(4);
- // start = (double) clock() / CLK_TCK ;
- if ((result_file = fopen("classic.txt", "w")) == NULL) {
- fprintf(stderr, "nie mozna otworzyc pliku wyniku \n");
- perror("classic");
- return(EXIT_FAILURE);
- }
- //Determine the number of threads to use
- if (USE_MULTIPLE_THREADS) {
- SYSTEM_INFO SysInfo;
- GetSystemInfo(&SysInfo);
- NumThreads = SysInfo.dwNumberOfProcessors;
- if (NumThreads > MAXTHREADS)
- NumThreads = MAXTHREADS;
- }
- else
- NumThreads = 1;
- fprintf(result_file, "Klasyczny algorytm mnozenia macierzy, liczba watkow %d \n", NumThreads);
- printf("liczba watkow = %d\n\n", NumThreads);
- //load_IKJ();
- //loadParallel_IKJ();
- //save_IKJ();
- //saveParallel_IKJ();
- load_IJK_IKJ();
- loadParallel_IJK_IKJ();
- save_IJK_IKJ();
- saveParallel_IJK_IKJ();
- return(0);
- }