4 / 1302_3_4
.pdfsize_t cols = N + 1;
for (int i = 0; i < N; ++i) { matrix[i * cols + i] += 200.0;
}
return matrix;
}
// Последовательный метод Гаусса-Жордана
void sequentialGaussJordan(vector<double>& matrix, int N) { int cols = N + 1;
for (int k = 0; k < N; ++k) {
double* pivotRow = &matrix[k * cols]; double pivot = pivotRow[k];
for (int j = k; j < cols; ++j) pivotRow[j] /= pivot; for (int i = 0; i < N; ++i) {
if (i != k) {
double* targetRow = &matrix[i * cols]; double factor = targetRow[k];
for (int j = k; j < cols; ++j) targetRow[j] -= factor * pivotRow[j];
}
}
}
}
// Параллельный метод Гаусса-Жордана
void parallelGaussJordan(vector<double>& local_matrix, int N, int rows_per_proc, int rank, int num_threads, const vector<int>& displs) {
int cols = N + 1; vector<double> pivot_row(cols); for (int k = 0; k < N; ++k) {
int owner = -1;
// Поиск владельца строки k (того потока, у которого эта строка находится) for (int p = 0; p < num_threads; ++p) {
int r_start = displs[p] / cols;
11
int r_count = (displs[(p + 1) % num_threads] > displs[p] ? (displs[(p+1)%num_threads] - displs[p]) : (N*cols - displs[p])) / cols;
if (p == num_threads - 1) r_count = N - r_start; if (k >= r_start && k < r_start + r_count) {
owner = p; break;
}
}
if (rank == owner) {
int local_k = k - (displs[rank] / cols);
double* srcRow = &local_matrix[local_k * cols]; for (int j = 0; j < cols; ++j) pivot_row[j] = srcRow[j]; double pivot = pivot_row[k];
for (int j = k; j < cols; ++j) pivot_row[j] /= pivot;
for (int j = 0; j < cols; ++j) srcRow[j] = pivot_row[j];
}
MPI_Bcast(pivot_row.data(), cols, MPI_DOUBLE, owner, MPI_COMM_WORLD); for (int i = 0; i < rows_per_proc; ++i) {
int global_row = (displs[rank] / cols) + i; if (global_row != k) {
double* targetRow = &local_matrix[i * cols]; double factor = targetRow[k];
for (int j = k; j < cols; ++j) targetRow[j] -= factor * pivot_row[j];
}
}
}
}
void runExample() {
cout << "\n=== EXAMPLE TEST (2x2 SYSTEM) ===" << endl;
cout << "System:\n1*x1 + 2*x2 = 3\n2*x1 - 1*x2 = 1\nExpected: x1=1, x2=1\n" << endl; int N = 2;
vector<double> matrix = {1.0, 2.0, 3.0, 2.0, -1.0, 1.0}; sequentialGaussJordan(matrix, N);
12
cout << "Result: x1=" << matrix[2] << ", x2=" << matrix[5] << endl; cout << string(30, '=') << endl << endl;
}
int main(int argc, char** argv) { MPI_Init(&argc, &argv);
int rank, num_threads; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &num_threads);
bool skipSerial = false;
for (int i = 1; i < argc; ++i) { string arg = argv[i];
if (arg == "--simple-example") { if (rank == 0) runExample(); MPI_Finalize();
return 0;
}
if (arg == "--skip-serial") { skipSerial = true;
}
}
for (int N : TEST_SIZES) { int cols = N + 1;
vector<double> global_matrix; double durationSeqMs = 0.0;
// Последовательное вычисление if (rank == 0) {
if (!skipSerial) {
global_matrix = generateSystem(N);
13
vector<double> seq_matrix = global_matrix;
double t1 = MPI_Wtime(); sequentialGaussJordan(seq_matrix, N); double t2 = MPI_Wtime();
durationSeqMs = (t2 - t1) * 1000.0; } else {
global_matrix = generateSystem(N); durationSeqMs = 0.0;
}
}
//Подготовка к параллельному вычислению int rows_per_proc = N / num_threads;
int rem = N % num_threads;
vector<int> sendcounts(num_threads), displs(num_threads); int current_disp = 0;
for(int i=0; i<num_threads; ++i) {
int r = rows_per_proc + (i < rem ? 1 : 0); sendcounts[i] = r * cols;
displs[i] = current_disp; current_disp += sendcounts[i];
}
vector<double> local_matrix(sendcounts[rank]); MPI_Scatterv(global_matrix.data(), sendcounts.data(), displs.data(), MPI_DOUBLE,
local_matrix.data(), sendcounts[rank], MPI_DOUBLE, 0, MPI_COMM_WORLD);
//Очистка памяти на главном потоке
if (rank == 0) vector<double>().swap(global_matrix);
// Параллельные вычисления
14
MPI_Barrier(MPI_COMM_WORLD); double startPar = MPI_Wtime();
parallelGaussJordan(local_matrix, N, sendcounts[rank]/cols, rank, num_threads, displs); MPI_Barrier(MPI_COMM_WORLD);
double endPar = MPI_Wtime();
double durationParMs = (endPar - startPar) * 1000.0;
if (rank == 0) {
cout << N << " " << fixed << setprecision(6) << durationSeqMs << " " << durationParMs << " " << num_threads << endl;
}
}
MPI_Finalize(); return 0;
}
1302_3_4.sh
#!/bin/bash
# Конфигурация EXECUTABLE_NAME="1302_3_4_run" THREAD_COUNTS=(2 4 6 8 10 12 14 16) TMP_RESULTS_FILE="results_slae.tmp"
# Компиляция
mpic++ -std=c++17 -O3 -march=native -ffast-math -funroll-loops -o $EXECUTABLE_NAME 1302_3_4.cpp
if [ $? -ne 0 ]; then
echo "Compilation failed." exit 1
fi
15
# Пример
mpirun --oversubscribe -np 2 ./$EXECUTABLE_NAME --simple-example
rm -f $TMP_RESULTS_FILE
# Сбор данных
for t_count in "${THREAD_COUNTS[@]}"; do if [ "$t_count" -eq 2 ]; then
echo "Running with $t_count threads"
mpirun --oversubscribe -np $t_count ./$EXECUTABLE_NAME >> $TMP_RESULTS_FILE
else
echo "Running with $t_count threads"
mpirun --oversubscribe -np $t_count ./$EXECUTABLE_NAME --skip-serial >> $TMP_RESULTS_FILE
fi done
# Сортировка и вывод данных
sort -n -k1,1 -k4,4 "$TMP_RESULTS_FILE" | awk '
{
size = $1; seq_time = $2; par_time = $3; t_count = $4;
if (t_count == 2) {
results[size, "serial"] = seq_time;
}
results[size, t_count] = par_time;
if (!seen_size[size]++) sorted_sizes[++num_sizes] = size;
if (!seen_tcount[t_count]++) sorted_tcounts[++num_tcounts] = t_count;
16
}
END {
first_col_width = 20; data_col_width = 14;
printf "\n=== PERFORMANCE RESULTS (GAUSS-JORDAN METHOD) ===\n";
border_line = "";
for (i = 0; i < first_col_width + 1; i++) { border_line = border_line "="; } border_line = border_line "|";
for (n = 1; n <= num_sizes; n++) { border_line = border_line "=";
for (i = 0; i < data_col_width; i++) { border_line = border_line "="; } border_line = border_line "=|";
}
print border_line
printf "%-*s |", first_col_width, "Parameter";
for (i = 1; i <= num_sizes; i++) printf " %-*s |", data_col_width, sorted_sizes[i]; printf "\n";
sep1=""; for(i=0; i<first_col_width; i++) sep1=sep1"-"; sep2=""; for(i=0; i<data_col_width; i++) sep2=sep2"-"; printf "%s-|", sep1;
for (i = 1; i <= num_sizes; i++) printf "-%s-|", sep2; printf "\n";
# Последовательное вычисление
printf "%-*s |", first_col_width, "Serial (ms)"; for (i = 1; i <= num_sizes; i++) {
val = results[sorted_sizes[i], "serial"]; val = sprintf("%.4f", val);
17
printf " %-*s |", data_col_width, val;
}
printf "\n";
# Параллельные вычисления для разного числа потоков for (j = 1; j <= num_tcounts; j++) {
t = sorted_tcounts[j];
printf "%-*s |", first_col_width, t " threads (ms)"; for (i = 1; i <= num_sizes; i++) {
val = results[sorted_sizes[i], t]; val = sprintf("%.4f", val);
printf " %-*s |", data_col_width, val;
}
printf "\n";
}
print border_line
}'
rm -f $TMP_RESULTS_FILE
18
ПРИЛОЖЕНИЕ Б Блок-схемы алгоритмов программы
Блок-схема последовательного алгоритма решения СЛАУ методом Гаусса-Жордан
19
Блок-схема параллельного (MPI) алгоритма решения СЛАУ методом Гаусса-Жордана
20
