From 4d6227cebf9f07ce1d0f2b89aad52b7a457d6769 Mon Sep 17 00:00:00 2001 From: caixilong Date: Wed, 3 Dec 2025 11:58:38 +0800 Subject: [PATCH 1/3] add sync test cases --- tests/fuzz/device/sync/barrier_kernel.h | 14 ++ tests/fuzz/device/sync/p2p_kernel.h | 5 + .../device/sync/barrier/barrier_kernel.cpp | 201 +++++++++++++++ tests/unittest/device/sync/p2p/p2p_kernel.cpp | 132 ++++++++++ .../host/sync/barrier/barrier_host_test.cpp | 233 +++++++++++++++++- .../host/sync/order/order_host_test.cpp | 45 ++-- .../unittest/host/sync/p2p/p2p_host_test.cpp | 125 ++++++++++ 7 files changed, 734 insertions(+), 21 deletions(-) diff --git a/tests/fuzz/device/sync/barrier_kernel.h b/tests/fuzz/device/sync/barrier_kernel.h index 33eaacf2..f8732177 100644 --- a/tests/fuzz/device/sync/barrier_kernel.h +++ b/tests/fuzz/device/sync/barrier_kernel.h @@ -12,9 +12,23 @@ void increase_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); void increase_vec_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void increase_odd_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void increase_vec_odd_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void increase_do_part_pes(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id); +void increase_vec_do_part_pes(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id); void increase_do_odd_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size, shmem_team_t team_id); void increase_vec_do_odd_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size, shmem_team_t team_id); +void increase_do_half_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id); +void increase_vec_do_half_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id); +void increase_do_one_pe_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id); +void increase_vec_do_one_pe_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id); #endif // BARRIER_KERNEL_H \ No newline at end of file diff --git a/tests/fuzz/device/sync/p2p_kernel.h b/tests/fuzz/device/sync/p2p_kernel.h index 6ea55d59..59cebb92 100644 --- a/tests/fuzz/device/sync/p2p_kernel.h +++ b/tests/fuzz/device/sync/p2p_kernel.h @@ -11,5 +11,10 @@ #define P2P_KERNEL_H void p2p_chain_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void p2p_chain_cmp_ne_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void p2p_chain_cmp_gt_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void p2p_chain_cmp_ge_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void p2p_chain_cmp_lt_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); +void p2p_chain_cmp_le_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size); #endif \ No newline at end of file diff --git a/tests/unittest/device/sync/barrier/barrier_kernel.cpp b/tests/unittest/device/sync/barrier/barrier_kernel.cpp index f115a1c7..b80ac020 100644 --- a/tests/unittest/device/sync/barrier/barrier_kernel.cpp +++ b/tests/unittest/device/sync/barrier/barrier_kernel.cpp @@ -42,6 +42,81 @@ extern "C" SHMEM_GLOBAL void increase_vec(uint64_t config, GM_ADDR addr, int ran #endif } +extern "C" SHMEM_GLOBAL void increase_odd(uint64_t config, GM_ADDR addr, int rank_id, int rank_size) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_CUBE__ + // scalar unit of cube core is not affected by barrier + shmem_barrier_all(); + shmem_barrier_all(); +#endif + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + + shmem_barrier_all(); + if (rank_id & 1) { + GM_ADDR remote = shmemi_ptr(addr, (rank_id + 2) % rank_size); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + + shmem_barrier_all(); +#endif +} + +extern "C" SHMEM_GLOBAL void increase_vec_odd(uint64_t config, GM_ADDR addr, int rank_id, int rank_size) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + + shmemx_barrier_all_vec(); + if (rank_id & 1) { + GM_ADDR remote = shmemi_ptr(addr, (rank_id + 2) % rank_size); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + shmemx_barrier_all_vec(); +#endif +} + +extern "C" SHMEM_GLOBAL void increase_part_pes(uint64_t config, GM_ADDR addr, int rank_id, + int rank_size, shmem_team_t team_id) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_CUBE__ + // scalar unit of cube core is not affected by barrier + shmem_barrier_all(); + shmem_barrier_all(); +#endif + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + + shmem_barrier(team_id); + if (rank_id & 1) { + GM_ADDR remote = shmemi_ptr(addr, (rank_id + 2) % rank_size); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + shmem_barrier(team_id); +#endif +} + +extern "C" SHMEM_GLOBAL void increase_vec_part_pes(uint64_t config, GM_ADDR addr, int rank_id, + int rank_size, shmem_team_t team_id) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + + shmemx_barrier_vec(team_id); + if (rank_id & 1) { + GM_ADDR remote = shmemi_ptr(addr, (rank_id + 2) % rank_size); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + shmemx_barrier_vec(team_id); +#endif +} + extern "C" SHMEM_GLOBAL void increase_odd_team(uint64_t config, GM_ADDR addr, int rank_id, int rank_size, shmem_team_t team_id) { shmemx_set_ffts_config(config); @@ -80,6 +155,86 @@ extern "C" SHMEM_GLOBAL void increase_vec_odd_team(uint64_t config, GM_ADDR addr #endif } +extern "C" SHMEM_GLOBAL void increase_half_team(uint64_t config, GM_ADDR addr, int rank_id, + int rank_size, shmem_team_t team_id) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_CUBE__ + // scalar unit of cube core is not affected by barrier + shmem_barrier_all(); + shmem_barrier_all(); +#endif + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + int team_size = rank_size / 2; + + shmem_barrier(team_id); + if (rank_id >= 1 && rank_id - 1 < team_size) { + GM_ADDR remote = shmemi_ptr(addr, 1 + (rank_id + 1) % team_size); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + shmem_barrier(team_id); +#endif +} + +extern "C" SHMEM_GLOBAL void increase_vec_half_team(uint64_t config, GM_ADDR addr, int rank_id, + int rank_size, shmem_team_t team_id) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + int team_size = rank_size / 2; + + shmemx_barrier_vec(team_id); + if (rank_id >= 1 && rank_id - 1 < team_size) { + GM_ADDR remote = shmemi_ptr(addr, 1 + (rank_id + 1) % team_size); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + shmemx_barrier_vec(team_id); +#endif +} + +extern "C" SHMEM_GLOBAL void increase_one_pe_team(uint64_t config, GM_ADDR addr, int rank_id, + int rank_size, shmem_team_t team_id) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_CUBE__ + // scalar unit of cube core is not affected by barrier + shmem_barrier_all(); + shmem_barrier_all(); +#endif + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + int team_size = shmem_team_n_pes(team_id); + + shmem_barrier(team_id); + if (rank_id == 1) { + GM_ADDR remote = shmemi_ptr(addr, rank_id); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + shmem_barrier(team_id); +#endif +} + +extern "C" SHMEM_GLOBAL void increase_vec_one_pe_team(uint64_t config, GM_ADDR addr, int rank_id, + int rank_size, shmem_team_t team_id) { + shmemx_set_ffts_config(config); + +#ifdef __DAV_C220_VEC__ + uint64_t val = shmemi_load((__gm__ uint64_t *)addr); + int team_size = shmem_team_n_pes(team_id); + + shmemx_barrier_vec(team_id); + if (rank_id == 1) { + GM_ADDR remote = shmemi_ptr(addr, rank_id); + shmemi_store((__gm__ uint64_t *)remote, val + 1); + } + shmemx_barrier_vec(team_id); +#endif +} + void increase_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) { increase<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size); @@ -90,6 +245,28 @@ void increase_vec_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, increase_vec<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size); } +void increase_odd_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) +{ + increase_odd<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size); +} + +void increase_vec_odd_do(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) +{ + increase_vec_odd<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size); +} + +void increase_do_part_pes(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id) +{ + increase_part_pes<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size, team_id); +} + +void increase_vec_do_part_pes(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id) +{ + increase_vec_part_pes<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size, team_id); +} + void increase_do_odd_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size, shmem_team_t team_id) { @@ -100,4 +277,28 @@ void increase_vec_do_odd_team(void* stream, uint64_t config, uint8_t *addr, int int rank_size, shmem_team_t team_id) { increase_vec_odd_team<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size, team_id); +} + +void increase_do_half_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id) +{ + increase_half_team<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size, team_id); +} + +void increase_vec_do_half_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id) +{ + increase_vec_half_team<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size, team_id); +} + +void increase_do_one_pe_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id) +{ + increase_one_pe_team<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size, team_id); +} + +void increase_vec_do_one_pe_team(void* stream, uint64_t config, uint8_t *addr, int rank_id, + int rank_size, shmem_team_t team_id) +{ + increase_vec_one_pe_team<<<16, nullptr, stream>>>(config, addr, rank_id, rank_size, team_id); } \ No newline at end of file diff --git a/tests/unittest/device/sync/p2p/p2p_kernel.cpp b/tests/unittest/device/sync/p2p/p2p_kernel.cpp index 21acbd4c..4be5c001 100644 --- a/tests/unittest/device/sync/p2p/p2p_kernel.cpp +++ b/tests/unittest/device/sync/p2p/p2p_kernel.cpp @@ -43,7 +43,139 @@ extern "C" SHMEM_GLOBAL void p2p_chain(uint64_t config, GM_ADDR addr, int rank_i shmem_barrier_all(); } +extern "C" SHMEM_GLOBAL void p2p_chain_cmp_ne(uint64_t config, GM_ADDR addr, int rank_id, int rank_size) +{ + shmemx_set_ffts_config(config); + auto sig_addr = (__gm__ int32_t *)addr; + int next = (rank_id + 1) % rank_size; + + shmem_barrier_all(); + +#ifdef __DAV_C220_VEC__ + if (rank_id == 0) { + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + shmem_signal_wait_until(sig_addr, SHMEM_CMP_NE, 0); + } else { + shmem_signal_wait_until(sig_addr, SHMEM_CMP_NE, 0); + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + } +#endif + + shmem_barrier_all(); +} + +extern "C" SHMEM_GLOBAL void p2p_chain_cmp_gt(uint64_t config, GM_ADDR addr, int rank_id, int rank_size) +{ + shmemx_set_ffts_config(config); + auto sig_addr = (__gm__ int32_t *)addr; + int next = (rank_id + 1) % rank_size; + + shmem_barrier_all(); + +#ifdef __DAV_C220_VEC__ + if (rank_id == 0) { + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + shmem_signal_wait_until(sig_addr, SHMEM_CMP_GT, 0); + } else { + shmem_signal_wait_until(sig_addr, SHMEM_CMP_GT, 0); + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + } +#endif + + shmem_barrier_all(); +} + +extern "C" SHMEM_GLOBAL void p2p_chain_cmp_ge(uint64_t config, GM_ADDR addr, int rank_id, int rank_size) +{ + shmemx_set_ffts_config(config); + auto sig_addr = (__gm__ int32_t *)addr; + int next = (rank_id + 1) % rank_size; + + shmem_barrier_all(); + +#ifdef __DAV_C220_VEC__ + if (rank_id == 0) { + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + shmem_signal_wait_until(sig_addr, SHMEM_CMP_GE, 1); + } else { + shmem_signal_wait_until(sig_addr, SHMEM_CMP_GE, 1); + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + } +#endif + + shmem_barrier_all(); +} + +extern "C" SHMEM_GLOBAL void p2p_chain_cmp_lt(uint64_t config, GM_ADDR addr, int rank_id, int rank_size) +{ + shmemx_set_ffts_config(config); + auto sig_addr = (__gm__ int32_t *)addr; + int next = (rank_id + 1) % rank_size; + + *sig_addr = 10; + shmem_barrier_all(); + +#ifdef __DAV_C220_VEC__ + if (rank_id == 0) { + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + shmem_signal_wait_until(sig_addr, SHMEM_CMP_LT, 5); + } else { + shmem_signal_wait_until(sig_addr, SHMEM_CMP_LT, 5); + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + } +#endif + + shmem_barrier_all(); +} + +extern "C" SHMEM_GLOBAL void p2p_chain_cmp_le(uint64_t config, GM_ADDR addr, int rank_id, int rank_size) +{ + shmemx_set_ffts_config(config); + auto sig_addr = (__gm__ int32_t *)addr; + int next = (rank_id + 1) % rank_size; + + *sig_addr = 10; + shmem_barrier_all(); + +#ifdef __DAV_C220_VEC__ + if (rank_id == 0) { + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + shmem_signal_wait_until(sig_addr, SHMEM_CMP_LE, 5); + } else { + shmem_signal_wait_until(sig_addr, SHMEM_CMP_LE, 5); + shmemx_signal_op(sig_addr, 1, SHMEM_SIGNAL_SET, next); + } +#endif + + shmem_barrier_all(); +} + void p2p_chain_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) { p2p_chain<<<1, nullptr, stream>>>(config, addr, rank_id, rank_size); +} + +void p2p_chain_cmp_ne_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) +{ + p2p_chain_cmp_ne<<<1, nullptr, stream>>>(config, addr, rank_id, rank_size); +} + +void p2p_chain_cmp_gt_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) +{ + p2p_chain_cmp_gt<<<1, nullptr, stream>>>(config, addr, rank_id, rank_size); +} + +void p2p_chain_cmp_ge_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) +{ + p2p_chain_cmp_ge<<<1, nullptr, stream>>>(config, addr, rank_id, rank_size); +} + +void p2p_chain_cmp_lt_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) +{ + p2p_chain_cmp_lt<<<1, nullptr, stream>>>(config, addr, rank_id, rank_size); +} + +void p2p_chain_cmp_le_do(void *stream, uint64_t config, uint8_t *addr, int rank_id, int rank_size) +{ + p2p_chain_cmp_le<<<1, nullptr, stream>>>(config, addr, rank_id, rank_size); } \ No newline at end of file diff --git a/tests/unittest/host/sync/barrier/barrier_host_test.cpp b/tests/unittest/host/sync/barrier/barrier_host_test.cpp index fbf71afc..81cabd04 100644 --- a/tests/unittest/host/sync/barrier/barrier_host_test.cpp +++ b/tests/unittest/host/sync/barrier/barrier_host_test.cpp @@ -17,7 +17,7 @@ #include "unittest_main_test.h" #include "barrier_kernel.h" -constexpr int32_t SHMEM_BARRIER_TEST_NUM = 3; +constexpr int32_t SHMEM_BARRIER_TEST_NUM = 100; static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) { @@ -31,7 +31,7 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo uint64_t *addr_host; ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host), sizeof(uint64_t)), 0); - for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM / 2; i++) { std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; increase_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); @@ -40,12 +40,23 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo shm::shmemi_control_barrier_all(); } + for (int32_t i = SHMEM_BARRIER_TEST_NUM / 2 + 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_odd_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + if (rank_id & 1){ + ASSERT_EQ(aclrtMemcpy(addr_host, sizeof(uint64_t), addr_dev, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), 0); + ASSERT_EQ((*addr_host), i); + } + shm::shmemi_control_barrier_all(); + } + uint64_t *addr_dev_vec = static_cast(shmem_malloc(sizeof(uint64_t))); ASSERT_EQ(aclrtMemset(addr_dev_vec, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); uint64_t *addr_host_vec; ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host_vec), sizeof(uint64_t)), 0); - for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM / 2; i++) { std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; increase_vec_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); @@ -55,6 +66,17 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo shm::shmemi_control_barrier_all(); } + for (int32_t i = SHMEM_BARRIER_TEST_NUM / 2 + 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_vec_odd_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + if (rank_id & 1){ + ASSERT_EQ(aclrtMemcpy(addr_host_vec, sizeof(uint64_t), addr_dev_vec, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), 0); + ASSERT_EQ((*addr_host_vec), i); + } + shm::shmemi_control_barrier_all(); + } + ASSERT_EQ(aclrtFreeHost(addr_host), 0); shmem_free(addr_dev); ASSERT_EQ(aclrtFreeHost(addr_host_vec), 0); @@ -126,6 +148,190 @@ static void test_barrier_black_box_odd_team(int32_t rank_id, int32_t n_ranks, ui } } +static void test_barrier_black_box_team_part_pes(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) +{ + int32_t device_id = rank_id % test_gnpu_num + test_first_npu; + aclrtStream stream; + test_init(rank_id, n_ranks, local_mem_size, &stream); + ASSERT_NE(stream, nullptr); + + shmem_team_t team; + int start = 1; + int stride = 1; + int team_size = n_ranks - 1; + shmem_team_split_strided(SHMEM_TEAM_WORLD, start, stride, team_size, &team); + + uint64_t *addr_dev = static_cast(shmem_malloc(sizeof(uint64_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); + uint64_t *addr_host; + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host), sizeof(uint64_t)), 0); + + uint64_t *addr_dev_vec = static_cast(shmem_malloc(sizeof(uint64_t))); + ASSERT_EQ(aclrtMemset(addr_dev_vec, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); + uint64_t *addr_host_vec; + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host_vec), sizeof(uint64_t)), 0); + + if (rank_id != 0) { + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_do_part_pes(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks, team); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + if (rank_id & 1) { + ASSERT_EQ(aclrtMemcpy(addr_host, sizeof(uint64_t), addr_dev, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), + 0); + ASSERT_EQ((*addr_host), i); + } + shm::shmemi_control_barrier_all(); + } + + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_vec_do_part_pes(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks, + team); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + if (rank_id & 1) { + ASSERT_EQ( + aclrtMemcpy(addr_host_vec, sizeof(uint64_t), addr_dev_vec, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), + 0); + ASSERT_EQ((*addr_host_vec), i); + } + shm::shmemi_control_barrier_all(); + } + } + + ASSERT_EQ(aclrtFreeHost(addr_host), 0); + shmem_free(addr_dev); + ASSERT_EQ(aclrtFreeHost(addr_host_vec), 0); + shmem_free(addr_dev_vec); + + shmem_team_destroy(team); + + test_finalize(stream, device_id); + if (::testing::Test::HasFailure()) { + exit(1); + } +} + +static void test_barrier_black_box_half_team(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) +{ + int32_t device_id = rank_id % test_gnpu_num + test_first_npu; + aclrtStream stream; + test_init(rank_id, n_ranks, local_mem_size, &stream); + ASSERT_NE(stream, nullptr); + + shmem_team_t team_half; + int start = 1; + int stride = 1; + int team_size = n_ranks / 2; + shmem_team_split_strided(SHMEM_TEAM_WORLD, start, stride, team_size, &team_half); + + uint64_t *addr_dev = static_cast(shmem_malloc(sizeof(uint64_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); + uint64_t *addr_host; + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host), sizeof(uint64_t)), 0); + + uint64_t *addr_dev_vec = static_cast(shmem_malloc(sizeof(uint64_t))); + ASSERT_EQ(aclrtMemset(addr_dev_vec, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); + uint64_t *addr_host_vec; + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host_vec), sizeof(uint64_t)), 0); + + if (rank_id >= start && rank_id - start < team_size) { + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_do_half_team(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks, team_half); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + ASSERT_EQ(aclrtMemcpy(addr_host, sizeof(uint64_t), addr_dev, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), + 0); + ASSERT_EQ((*addr_host), i); + shm::shmemi_control_barrier_all(); + } + + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_vec_do_half_team(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks, + team_half); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + ASSERT_EQ( + aclrtMemcpy(addr_host_vec, sizeof(uint64_t), addr_dev_vec, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), + 0); + ASSERT_EQ((*addr_host_vec), i); + shm::shmemi_control_barrier_all(); + } + } + + ASSERT_EQ(aclrtFreeHost(addr_host), 0); + shmem_free(addr_dev); + ASSERT_EQ(aclrtFreeHost(addr_host_vec), 0); + shmem_free(addr_dev_vec); + + shmem_team_destroy(team_half); + + test_finalize(stream, device_id); + if (::testing::Test::HasFailure()) { + exit(1); + } +} + +static void test_barrier_black_box_one_pe_team(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) +{ + int32_t device_id = rank_id % test_gnpu_num + test_first_npu; + aclrtStream stream; + test_init(rank_id, n_ranks, local_mem_size, &stream); + ASSERT_NE(stream, nullptr); + + shmem_team_t team_one_pe; + int start = 1; + int stride = 1; + int team_size = 1; + shmem_team_split_strided(SHMEM_TEAM_WORLD, start, stride, team_size, &team_one_pe); + + uint64_t *addr_dev = static_cast(shmem_malloc(sizeof(uint64_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); + uint64_t *addr_host; + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host), sizeof(uint64_t)), 0); + + uint64_t *addr_dev_vec = static_cast(shmem_malloc(sizeof(uint64_t))); + ASSERT_EQ(aclrtMemset(addr_dev_vec, sizeof(uint64_t), 0, sizeof(uint64_t)), 0); + uint64_t *addr_host_vec; + ASSERT_EQ(aclrtMallocHost(reinterpret_cast(&addr_host_vec), sizeof(uint64_t)), 0); + + if (rank_id == 1) { + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_do_one_pe_team(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks, team_one_pe); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + ASSERT_EQ(aclrtMemcpy(addr_host, sizeof(uint64_t), addr_dev, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), + 0); + ASSERT_EQ((*addr_host), i); + shm::shmemi_control_barrier_all(); + } + + for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { + std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; + increase_vec_do_one_pe_team(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks, + team_one_pe); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + ASSERT_EQ( + aclrtMemcpy(addr_host_vec, sizeof(uint64_t), addr_dev_vec, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), + 0); + ASSERT_EQ((*addr_host_vec), i); + shm::shmemi_control_barrier_all(); + } + } + + ASSERT_EQ(aclrtFreeHost(addr_host), 0); + shmem_free(addr_dev); + ASSERT_EQ(aclrtFreeHost(addr_host_vec), 0); + shmem_free(addr_dev_vec); + + shmem_team_destroy(team_one_pe); + + test_finalize(stream, device_id); + if (::testing::Test::HasFailure()) { + exit(1); + } +} + TEST(TEST_SYNC_API, test_barrier_black_box) { const int32_t process_count = test_gnpu_num; @@ -139,3 +345,24 @@ TEST(TEST_SYNC_API, test_barrier_black_box_odd_team) uint64_t local_mem_size = 1024UL * 1024UL * 16; test_mutil_task(test_barrier_black_box_odd_team, local_mem_size, process_count); } + +TEST(TEST_SYNC_API, test_barrier_black_box_team_part_pes) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_barrier_black_box_team_part_pes, local_mem_size, process_count); +} + +TEST(TEST_SYNC_API, test_barrier_black_box_half_team) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_barrier_black_box_half_team, local_mem_size, process_count); +} + +TEST(TEST_SYNC_API, test_barrier_black_box_one_pe_team) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_barrier_black_box_one_pe_team, local_mem_size, process_count); +} diff --git a/tests/unittest/host/sync/order/order_host_test.cpp b/tests/unittest/host/sync/order/order_host_test.cpp index bd2c10f0..79560eb7 100644 --- a/tests/unittest/host/sync/order/order_host_test.cpp +++ b/tests/unittest/host/sync/order/order_host_test.cpp @@ -18,6 +18,8 @@ #include "unittest_main_test.h" #include "order_kernel.h" +constexpr int32_t SHMEM_ORDER_TEST_NUM = 1024; + static void test_quiet_order(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) { aclrtStream stream; @@ -31,17 +33,20 @@ static void test_quiet_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me std::vector host_buf(total_size, 0); - std::cout << "[TEST] fence order test rank " << rank_id << std::endl; - quiet_order_do(stream, shmemx_get_ffts_config(), (uint8_t *)dev_ptr, rank_id, n_ranks); + for (int32_t i = 1; i <= SHMEM_ORDER_TEST_NUM; i++) { + std::cout << "[TEST] quiet order test rank " << rank_id << " time: " << i << std::endl; + quiet_order_do(stream, shmemx_get_ffts_config(), (uint8_t *)dev_ptr, rank_id, n_ranks); - ASSERT_EQ(aclrtSynchronizeStream(stream), 0); - ASSERT_EQ(aclrtMemcpy(host_buf.data(), total_size * sizeof(uint64_t), dev_ptr, total_size * sizeof(uint64_t), - ACL_MEMCPY_DEVICE_TO_HOST), - 0); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + ASSERT_EQ(aclrtMemcpy(host_buf.data(), total_size * sizeof(uint64_t), dev_ptr, total_size * sizeof(uint64_t), + ACL_MEMCPY_DEVICE_TO_HOST), + 0); - if (rank_id == 1) { - ASSERT_EQ(host_buf[33U], 0xBBu); - ASSERT_EQ(host_buf[34U], 0xAAu); + if (rank_id == 1) { + ASSERT_EQ(host_buf[33U], 0xBBu); + ASSERT_EQ(host_buf[34U], 0xAAu); + } + shm::shmemi_control_barrier_all(); } shmem_free(dev_ptr); @@ -65,18 +70,22 @@ static void test_fence_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me std::vector addr_host(total_size, 0); - std::cout << "[TEST] fence order test rank " << rank_id << std::endl; - fence_order_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks); + for (int32_t i = 1; i <= SHMEM_ORDER_TEST_NUM; i++) { + std::cout << "[TEST] fence order test rank " << rank_id << " time: " << i << std::endl; + fence_order_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks); - ASSERT_EQ(aclrtSynchronizeStream(stream), 0); - ASSERT_EQ(aclrtMemcpy(addr_host.data(), total_size * sizeof(uint64_t), addr_dev, total_size * sizeof(uint64_t), - ACL_MEMCPY_DEVICE_TO_HOST), - 0); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + ASSERT_EQ(aclrtMemcpy(addr_host.data(), total_size * sizeof(uint64_t), addr_dev, total_size * sizeof(uint64_t), + ACL_MEMCPY_DEVICE_TO_HOST), + 0); - if (rank_id == 1) { - ASSERT_EQ(addr_host[17U], 84u); - ASSERT_EQ(addr_host[18U], 42u); + if (rank_id == 1) { + ASSERT_EQ(addr_host[17U], 84u); + ASSERT_EQ(addr_host[18U], 42u); + } + shm::shmemi_control_barrier_all(); } + shmem_free(addr_dev); test_finalize(stream, device_id); diff --git a/tests/unittest/host/sync/p2p/p2p_host_test.cpp b/tests/unittest/host/sync/p2p/p2p_host_test.cpp index 63450f4f..dc80f10a 100644 --- a/tests/unittest/host/sync/p2p/p2p_host_test.cpp +++ b/tests/unittest/host/sync/p2p/p2p_host_test.cpp @@ -40,4 +40,129 @@ TEST(TEST_SYNC_API, test_p2p) const int32_t process_count = test_gnpu_num; uint64_t local_mem_size = 1024UL * 1024UL * 16; test_mutil_task(test_p2p, local_mem_size, process_count); +} + +static void test_p2p_cmp_ne(int rank_id, int rank_size, uint64_t local_mem_size) +{ + aclrtStream stream; + test_init(rank_id, rank_size, local_mem_size, &stream); + + int32_t *addr_dev = static_cast(shmem_malloc(sizeof(int32_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(int32_t), 0, sizeof(int32_t)), 0); + + p2p_chain_cmp_ne_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, rank_size); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + + shmem_free(addr_dev); + + int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; + test_finalize(stream, dev_id); + if (::testing::Test::HasFailure()) exit(1); +} + +TEST(TEST_SYNC_API, test_p2p_cmp_ne) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_p2p_cmp_ne, local_mem_size, process_count); +} + +static void test_p2p_cmp_gt(int rank_id, int rank_size, uint64_t local_mem_size) +{ + aclrtStream stream; + test_init(rank_id, rank_size, local_mem_size, &stream); + + int32_t *addr_dev = static_cast(shmem_malloc(sizeof(int32_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(int32_t), 0, sizeof(int32_t)), 0); + + p2p_chain_cmp_gt_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, rank_size); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + + shmem_free(addr_dev); + + int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; + test_finalize(stream, dev_id); + if (::testing::Test::HasFailure()) exit(1); +} + +TEST(TEST_SYNC_API, test_p2p_cmp_gt) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_p2p_cmp_gt, local_mem_size, process_count); +} + +static void test_p2p_cmp_ge(int rank_id, int rank_size, uint64_t local_mem_size) +{ + aclrtStream stream; + test_init(rank_id, rank_size, local_mem_size, &stream); + + int32_t *addr_dev = static_cast(shmem_malloc(sizeof(int32_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(int32_t), 0, sizeof(int32_t)), 0); + + p2p_chain_cmp_ge_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, rank_size); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + + shmem_free(addr_dev); + + int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; + test_finalize(stream, dev_id); + if (::testing::Test::HasFailure()) exit(1); +} + +TEST(TEST_SYNC_API, test_p2p_cmp_ge) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_p2p_cmp_ge, local_mem_size, process_count); +} + +static void test_p2p_cmp_lt(int rank_id, int rank_size, uint64_t local_mem_size) +{ + aclrtStream stream; + test_init(rank_id, rank_size, local_mem_size, &stream); + + int32_t *addr_dev = static_cast(shmem_malloc(sizeof(int32_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(int32_t), 0, sizeof(int32_t)), 0); + + p2p_chain_cmp_lt_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, rank_size); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + + shmem_free(addr_dev); + + int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; + test_finalize(stream, dev_id); + if (::testing::Test::HasFailure()) exit(1); +} + +TEST(TEST_SYNC_API, test_p2p_cmp_lt) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_p2p_cmp_lt, local_mem_size, process_count); +} + +static void test_p2p_cmp_le(int rank_id, int rank_size, uint64_t local_mem_size) +{ + aclrtStream stream; + test_init(rank_id, rank_size, local_mem_size, &stream); + + int32_t *addr_dev = static_cast(shmem_malloc(sizeof(int32_t))); + ASSERT_EQ(aclrtMemset(addr_dev, sizeof(int32_t), 0, sizeof(int32_t)), 0); + + p2p_chain_cmp_le_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, rank_size); + ASSERT_EQ(aclrtSynchronizeStream(stream), 0); + + shmem_free(addr_dev); + + int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; + test_finalize(stream, dev_id); + if (::testing::Test::HasFailure()) exit(1); +} + +TEST(TEST_SYNC_API, test_p2p_cmp_le) +{ + const int32_t process_count = test_gnpu_num; + uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_p2p_cmp_le, local_mem_size, process_count); } \ No newline at end of file -- Gitee From 0767699e14224a4c50037d91899f940931ab4697 Mon Sep 17 00:00:00 2001 From: caixilong Date: Wed, 3 Dec 2025 14:13:58 +0800 Subject: [PATCH 2/3] cleancode --- tests/unittest/host/main_test.cpp | 8 +++++- .../host/sync/barrier/barrier_host_test.cpp | 27 +++++-------------- .../host/sync/order/order_host_test.cpp | 8 +++--- .../unittest/host/sync/p2p/p2p_host_test.cpp | 8 ------ 4 files changed, 17 insertions(+), 34 deletions(-) diff --git a/tests/unittest/host/main_test.cpp b/tests/unittest/host/main_test.cpp index a91dbaba..8b14b152 100644 --- a/tests/unittest/host/main_test.cpp +++ b/tests/unittest/host/main_test.cpp @@ -88,10 +88,16 @@ void test_mutil_task(std::function func, uint64_t loca std::cout << "fork failed ! " << pids[i] << std::endl; } else if (pids[i] == 0) { func(i + test_first_rank, test_global_ranks, local_mem_size); - exit(0); + if (::testing::Test::HasFailure()) { + _exit(1); + } + _exit(0); } } for (int i = 0; i < process_count; ++i) { + if (pids[i] <= 0) { + continue; + } waitpid(pids[i], &status[i], 0); if (WIFEXITED(status[i]) && WEXITSTATUS(status[i]) != 0) { FAIL(); diff --git a/tests/unittest/host/sync/barrier/barrier_host_test.cpp b/tests/unittest/host/sync/barrier/barrier_host_test.cpp index 81cabd04..b9edda89 100644 --- a/tests/unittest/host/sync/barrier/barrier_host_test.cpp +++ b/tests/unittest/host/sync/barrier/barrier_host_test.cpp @@ -44,7 +44,7 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; increase_odd_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev, rank_id, n_ranks); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); - if (rank_id & 1){ + if (rank_id & 1) { ASSERT_EQ(aclrtMemcpy(addr_host, sizeof(uint64_t), addr_dev, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), 0); ASSERT_EQ((*addr_host), i); } @@ -70,7 +70,7 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo std::cout << "[TEST] barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; increase_vec_odd_do(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); - if (rank_id & 1){ + if (rank_id & 1) { ASSERT_EQ(aclrtMemcpy(addr_host_vec, sizeof(uint64_t), addr_dev_vec, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), 0); ASSERT_EQ((*addr_host_vec), i); } @@ -83,9 +83,6 @@ static void test_barrier_black_box(int32_t rank_id, int32_t n_ranks, uint64_t lo shmem_free(addr_dev_vec); test_finalize(stream, device_id); - if (::testing::Test::HasFailure()) { - exit(1); - } } static void test_barrier_black_box_odd_team(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) @@ -143,9 +140,6 @@ static void test_barrier_black_box_odd_team(int32_t rank_id, int32_t n_ranks, ui shmem_team_destroy(team_odd); test_finalize(stream, device_id); - if (::testing::Test::HasFailure()) { - exit(1); - } } static void test_barrier_black_box_team_part_pes(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) @@ -178,7 +172,7 @@ static void test_barrier_black_box_team_part_pes(int32_t rank_id, int32_t n_rank ASSERT_EQ(aclrtSynchronizeStream(stream), 0); if (rank_id & 1) { ASSERT_EQ(aclrtMemcpy(addr_host, sizeof(uint64_t), addr_dev, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), - 0); + 0); ASSERT_EQ((*addr_host), i); } shm::shmemi_control_barrier_all(); @@ -187,7 +181,7 @@ static void test_barrier_black_box_team_part_pes(int32_t rank_id, int32_t n_rank for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; increase_vec_do_part_pes(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks, - team); + team); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); if (rank_id & 1) { ASSERT_EQ( @@ -207,9 +201,6 @@ static void test_barrier_black_box_team_part_pes(int32_t rank_id, int32_t n_rank shmem_team_destroy(team); test_finalize(stream, device_id); - if (::testing::Test::HasFailure()) { - exit(1); - } } static void test_barrier_black_box_half_team(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) @@ -249,7 +240,7 @@ static void test_barrier_black_box_half_team(int32_t rank_id, int32_t n_ranks, u for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; increase_vec_do_half_team(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks, - team_half); + team_half); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); ASSERT_EQ( aclrtMemcpy(addr_host_vec, sizeof(uint64_t), addr_dev_vec, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), @@ -267,9 +258,6 @@ static void test_barrier_black_box_half_team(int32_t rank_id, int32_t n_ranks, u shmem_team_destroy(team_half); test_finalize(stream, device_id); - if (::testing::Test::HasFailure()) { - exit(1); - } } static void test_barrier_black_box_one_pe_team(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) @@ -309,7 +297,7 @@ static void test_barrier_black_box_one_pe_team(int32_t rank_id, int32_t n_ranks, for (int32_t i = 1; i <= SHMEM_BARRIER_TEST_NUM; i++) { std::cout << "[TEST] vec barriers test blackbox rank_id: " << rank_id << " time: " << i << std::endl; increase_vec_do_one_pe_team(stream, shmemx_get_ffts_config(), (uint8_t *)addr_dev_vec, rank_id, n_ranks, - team_one_pe); + team_one_pe); ASSERT_EQ(aclrtSynchronizeStream(stream), 0); ASSERT_EQ( aclrtMemcpy(addr_host_vec, sizeof(uint64_t), addr_dev_vec, sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), @@ -327,9 +315,6 @@ static void test_barrier_black_box_one_pe_team(int32_t rank_id, int32_t n_ranks, shmem_team_destroy(team_one_pe); test_finalize(stream, device_id); - if (::testing::Test::HasFailure()) { - exit(1); - } } TEST(TEST_SYNC_API, test_barrier_black_box) diff --git a/tests/unittest/host/sync/order/order_host_test.cpp b/tests/unittest/host/sync/order/order_host_test.cpp index 79560eb7..5694389f 100644 --- a/tests/unittest/host/sync/order/order_host_test.cpp +++ b/tests/unittest/host/sync/order/order_host_test.cpp @@ -39,8 +39,8 @@ static void test_quiet_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me ASSERT_EQ(aclrtSynchronizeStream(stream), 0); ASSERT_EQ(aclrtMemcpy(host_buf.data(), total_size * sizeof(uint64_t), dev_ptr, total_size * sizeof(uint64_t), - ACL_MEMCPY_DEVICE_TO_HOST), - 0); + ACL_MEMCPY_DEVICE_TO_HOST), + 0); if (rank_id == 1) { ASSERT_EQ(host_buf[33U], 0xBBu); @@ -76,8 +76,8 @@ static void test_fence_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me ASSERT_EQ(aclrtSynchronizeStream(stream), 0); ASSERT_EQ(aclrtMemcpy(addr_host.data(), total_size * sizeof(uint64_t), addr_dev, total_size * sizeof(uint64_t), - ACL_MEMCPY_DEVICE_TO_HOST), - 0); + ACL_MEMCPY_DEVICE_TO_HOST), + 0); if (rank_id == 1) { ASSERT_EQ(addr_host[17U], 84u); diff --git a/tests/unittest/host/sync/p2p/p2p_host_test.cpp b/tests/unittest/host/sync/p2p/p2p_host_test.cpp index dc80f10a..0bdb6934 100644 --- a/tests/unittest/host/sync/p2p/p2p_host_test.cpp +++ b/tests/unittest/host/sync/p2p/p2p_host_test.cpp @@ -30,9 +30,6 @@ static void test_p2p(int rank_id, int rank_size, uint64_t local_mem_size) int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; test_finalize(stream, dev_id); - if (::testing::Test::HasFailure()) { - exit(1); - } } TEST(TEST_SYNC_API, test_p2p) @@ -57,7 +54,6 @@ static void test_p2p_cmp_ne(int rank_id, int rank_size, uint64_t local_mem_size) int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; test_finalize(stream, dev_id); - if (::testing::Test::HasFailure()) exit(1); } TEST(TEST_SYNC_API, test_p2p_cmp_ne) @@ -82,7 +78,6 @@ static void test_p2p_cmp_gt(int rank_id, int rank_size, uint64_t local_mem_size) int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; test_finalize(stream, dev_id); - if (::testing::Test::HasFailure()) exit(1); } TEST(TEST_SYNC_API, test_p2p_cmp_gt) @@ -107,7 +102,6 @@ static void test_p2p_cmp_ge(int rank_id, int rank_size, uint64_t local_mem_size) int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; test_finalize(stream, dev_id); - if (::testing::Test::HasFailure()) exit(1); } TEST(TEST_SYNC_API, test_p2p_cmp_ge) @@ -132,7 +126,6 @@ static void test_p2p_cmp_lt(int rank_id, int rank_size, uint64_t local_mem_size) int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; test_finalize(stream, dev_id); - if (::testing::Test::HasFailure()) exit(1); } TEST(TEST_SYNC_API, test_p2p_cmp_lt) @@ -157,7 +150,6 @@ static void test_p2p_cmp_le(int rank_id, int rank_size, uint64_t local_mem_size) int32_t dev_id = rank_id % test_gnpu_num + test_first_npu; test_finalize(stream, dev_id); - if (::testing::Test::HasFailure()) exit(1); } TEST(TEST_SYNC_API, test_p2p_cmp_le) -- Gitee From 8308dfd2758c0db05d7d9413b000105d5d43b191 Mon Sep 17 00:00:00 2001 From: caixilong Date: Wed, 3 Dec 2025 14:36:30 +0800 Subject: [PATCH 3/3] add get_config case --- .../host/sync/config/config_host_test.cpp | 44 +++++++++++++++++++ .../host/sync/order/order_host_test.cpp | 4 +- 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 tests/unittest/host/sync/config/config_host_test.cpp diff --git a/tests/unittest/host/sync/config/config_host_test.cpp b/tests/unittest/host/sync/config/config_host_test.cpp new file mode 100644 index 00000000..7ddacf47 --- /dev/null +++ b/tests/unittest/host/sync/config/config_host_test.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2025 Huawei Technologies Co., Ltd. + * This program is free software, you can redistribute it and/or modify it under the terms and conditions of + * CANN Open Software License Agreement Version 2.0 (the "License"). + * Please refer to the License for details. You may not use this file except in compliance with the License. + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. + * See LICENSE in the root of the software repository for the full text of the License. + */ +#include +#include +#include + +#include "acl/acl.h" +#include "shmem_api.h" +#include "shmemi_host_common.h" +#include "unittest_main_test.h" + +static void test_get_ffts_config_lifecycle(int32_t rank_id, int32_t n_ranks, uint64_t local_mem_size) +{ + const uint64_t cfg_before = shmemx_get_ffts_config(); + EXPECT_EQ(cfg_before, 0UL); + + int32_t device_id = rank_id % test_gnpu_num + test_first_npu; + aclrtStream stream {nullptr}; + test_init(rank_id, n_ranks, local_mem_size, &stream); + ASSERT_NE(stream, nullptr); + + const uint64_t cfg_after_init = shmemx_get_ffts_config(); + ASSERT_NE(cfg_after_init, 0UL); + + for (int i = 0; i < 256; ++i) { + ASSERT_EQ(shmemx_get_ffts_config(), cfg_after_init); + } + + test_finalize(stream, device_id); +} + +TEST(TEST_SYNC_API, test_get_ffts_config_lifecycle) +{ + const int32_t process_count = test_gnpu_num; + const uint64_t local_mem_size = 1024UL * 1024UL * 16; + test_mutil_task(test_get_ffts_config_lifecycle, local_mem_size, process_count); +} diff --git a/tests/unittest/host/sync/order/order_host_test.cpp b/tests/unittest/host/sync/order/order_host_test.cpp index 5694389f..48768577 100644 --- a/tests/unittest/host/sync/order/order_host_test.cpp +++ b/tests/unittest/host/sync/order/order_host_test.cpp @@ -40,7 +40,7 @@ static void test_quiet_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me ASSERT_EQ(aclrtSynchronizeStream(stream), 0); ASSERT_EQ(aclrtMemcpy(host_buf.data(), total_size * sizeof(uint64_t), dev_ptr, total_size * sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), - 0); + 0); if (rank_id == 1) { ASSERT_EQ(host_buf[33U], 0xBBu); @@ -77,7 +77,7 @@ static void test_fence_order(int32_t rank_id, int32_t n_ranks, uint64_t local_me ASSERT_EQ(aclrtSynchronizeStream(stream), 0); ASSERT_EQ(aclrtMemcpy(addr_host.data(), total_size * sizeof(uint64_t), addr_dev, total_size * sizeof(uint64_t), ACL_MEMCPY_DEVICE_TO_HOST), - 0); + 0); if (rank_id == 1) { ASSERT_EQ(addr_host[17U], 84u); -- Gitee