//----------------------------------------------------------------------------- // axb-etal.cc //----------------------------------------------------------------------------- #include #include #include #include #include #include //----------------------------------------------------------------------------- const int M = 1234, N = 678; // multiplos de 2 por quitar if y desenrollar static_assert(M % 2 == 0 && N % 2 == 0); //----------------------------------------------------------------------------- int a[N][M], b[N][M], c[N]; //----------------------------------------------------------------------------- void f0() // original { for (int i = 0; i < M; ++i) for (int j = 0; j < N; ++j) if ((j % 2) == 0) c[j] += a[j][i] + b[j][i]; else c[j] += a[j][i] - b[j][i]; } //----------------------------------------------------------------------------- void f1() // reordenar { for (int j = 0; j < N; ++j) if ((j % 2) == 0) for (int i = 0; i < M; ++i) c[j] += a[j][i] + b[j][i]; else for (int i = 0; i < M; ++i) c[j] += a[j][i] - b[j][i]; } //----------------------------------------------------------------------------- void f2() // reordenar-if { for (int j = 0; j < N; j += 2) for (int i = 0; i < M; ++i) { c[j ] += a[j ][i] + b[j ][i]; c[j + 1] += a[j + 1][i] - b[j + 1][i]; } } //----------------------------------------------------------------------------- void f3() // desenrollar { for (int j = 0; j < N; j += 2) for (int i = 0; i < M; i += 2) { c[j ] += a[j ][i ] + b[j ][i ] + a[j ][i + 1] + b[j ][i + 1]; c[j + 1] += a[j + 1][i ] - b[j + 1][i ] + a[j + 1][i + 1] - b[j + 1][i + 1]; } } //----------------------------------------------------------------------------- void f4() // precalcular { int aa[N], bb[N]; for (int j = 0; j < N; ++j) { aa[j] = std::accumulate(a[j], a[j] + M, 0); bb[j] = std::accumulate(b[j], b[j] + M, 0); } for (int j = 0; j < N; j += 2) { c[j ] += aa[j ] + bb[j ]; c[j + 1] += aa[j + 1] - bb[j + 1]; } } //----------------------------------------------------------------------------- void f5() // precalcular2 { for (int j = 0; j < N; ++j) c[j] += std::accumulate(&a[j][0], &a[j][M], 0); for (int j = 0; j < N; j += 2) { c[j ] += std::accumulate(&b[j ][0], &b[j ][M], 0); c[j + 1] -= std::accumulate(&b[j + 1][0], &b[j + 1][M], 0); } } //----------------------------------------------------------------------------- template void test(const F& f, const char* name) { const unsigned REP = 33; std::chrono::duration rep[REP]; std::ranges::fill(c, 0); for (auto& i: rep) { auto start = std::chrono::high_resolution_clock::now(); f(); auto stop = std::chrono::high_resolution_clock::now(); i = stop - start; } std::nth_element(rep, rep + REP / 2, rep + REP); std::cout << std::setw(16) << name << ':' << " time: " << std::fixed << std::setprecision(2) << std::setw(10) << rep[REP / 2].count() << "us" << " result = " << std::accumulate(std::begin(c), std::end(c), 0) << std::endl; } //----------------------------------------------------------------------------- int main() { std::random_device device; std::default_random_engine engine(device()); std::uniform_int_distribution distribution(-5, 5); auto rng = std::bind(distribution, engine); for (int i = 0; i < N; ++i) for (int j = 0; j < M; ++j) { a[i][j] = rng(); b[i][j] = rng(); } test(f0, "original"); test(f1, "reordenar"); test(f2, "reordenar-if"); test(f3, "desenrollar"); test(f4, "precalcular"); test(f5, "precalcular2"); } //-----------------------------------------------------------------------------