From b125d7ab0b65a868a4337b3c0dcac8589ae44101 Mon Sep 17 00:00:00 2001 From: guopeian Date: Thu, 27 Nov 2025 16:18:05 +0000 Subject: [PATCH 1/8] =?UTF-8?q?!3061=20=E6=94=AF=E6=8C=81string=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=E8=BE=93=E5=85=A5=20Merge=20pull=20request=20!3061=20?= =?UTF-8?q?from=20guopeian/string=5Fsupport?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/kernels/geop_npu.cc | 15 ++++++++++----- tf_adapter/kernels/geop_npu.h | 2 +- .../optimizers/om_partition_subgraphs_pass.cc | 11 +++++++++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index bd66ca6a2..bcad34248 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -2266,12 +2266,12 @@ void GeOp::AnalyzeInputDesc(bool need_collect_shapes, void *tensor_ptr, ge::Tens << ", input data addr: " << reinterpret_cast(data); } -Status GeOp::AnalyzeStringInput(ge::Tensor &input, uint64_t count, const std::string *string_vector) const { +Status GeOp::AnalyzeStringInput(ge::Tensor &input, const std::vector &string_vector) const { + const size_t count = string_vector.size(); uint64_t total_size = 0U; for (uint64_t i = 0U; i < count; i++) { total_size += (string_vector[i].size() + sizeof(ge::StringHead) + 1U); } - std::unique_ptr addr(new (std::nothrow) char[total_size]()); REQUIRES_NOT_NULL(addr); ge::StringHead *string_head = ge::PtrToPtr(addr.get()); @@ -2296,7 +2296,9 @@ Status GeOp::AnalyzeStringInput(ge::Tensor &input, uint64_t count, const std::st NPU_REQUIRES(ret == EOK, errors::Internal("call memcpy_s failed, ret:", ret)); data_addr += (str_size + 1U); offset += (static_cast(str_size) + 1); + ADP_LOG(INFO) << "[GEOP] String input element: " << i << ", size: " << str_size; } + ADP_LOG(INFO) << "[GEOP] String input total size " << total_size; input.SetData(ge::PtrToPtr(addr.get()), total_size); return Status::OK(); } @@ -2454,10 +2456,13 @@ Status GeOp::BuildInputTensorInfo(OpKernelContext *const ctx, std::vector(tensor.NumElements()); - std::string *string_vector = static_cast(tensor_ptr); - if (AnalyzeStringInput(input, count, string_vector) != Status::OK()) { + std::vector string_vector; + for (uint64_t i = 0UL; i < count; i++) { + string_vector.emplace_back(tensor.flat()(i)); + } + ADP_LOG(INFO) << "[GEOP] Analyze string input: " << i << ", element num: " << count; + if (AnalyzeStringInput(input, string_vector) != Status::OK()) { return errors::Internal("The input string data analyze failed."); } } else { diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index c0a0b8028..5a4fcd3cd 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -99,7 +99,7 @@ public: std::vector &partition_graph, std::map &const_value_map); // Analyze sting input data - Status AnalyzeStringInput(ge::Tensor &input, uint64_t count, const std::string *string_vector) const; + Status AnalyzeStringInput(ge::Tensor &input, const std::vector &string_vector) const; // prepare input tensor Status BuildInputTensorInfo(OpKernelContext *const ctx, diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index 96da1c1c2..7ec2cd4c5 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -566,6 +566,7 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, int64 startTime = InferShapeUtil::GetCurrentTimestap(); bool enable_dp = pass_options["enable_dp"] == "1"; bool mix_compile_mode = pass_options["mix_compile_mode"] == "1"; + int32_t iterations_per_loop = std::atoi(pass_options["iterations_per_loop"].c_str()); compile_mode = mix_compile_mode; std::vector sortedNodes; bool hasIteratorOp = false; @@ -696,11 +697,14 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, << node->name() << " REF input."; continue; } - if ((dtype_dst == DT_STRING) || (dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT)) { + if ((dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT) || (dtype_dst == DT_STRING)) { const AttrValue *attr_value = edge->dst()->attrs().Find(ATTR_NAME_OP_MAX_SIZE); if (attr_value != nullptr) { continue; } if (edge->dst()->type_string() == "Assert") { continue; } if (node->type_string() == "Const") { continue; } + // 非循环下沉场景,string可以作为输出 + if ((iterations_per_loop == 1) && (dtype_dst == DT_STRING)) { continue; } + ADP_LOG(INFO) << "remove node: " << edge->dst()->name() << "from candidates because string or resource"; if (candidates->erase(edge->dst()) > 0) { (void) outSet.insert(edge->dst()); } } } @@ -718,7 +722,7 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, << node->name() << " REF Output."; continue; } - if ((dtype_dst == DT_STRING) || (dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT)) { + if ((dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT) || (dtype_dst == DT_STRING)) { const AttrValue *attr_value = node->attrs().Find(ATTR_NAME_OP_MAX_SIZE); if (attr_value != nullptr) { ADP_LOG(INFO) << "Node : " << node->name() << " add to candidates, because of had max size."; @@ -730,6 +734,9 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, ADP_LOG(EVENT) << "GetNext: " << src_node_type << " node should sink if enable_data_pre_proc is true"; continue; } + // 非循环下沉场景,string可以作为输入 + if ((iterations_per_loop == 1) && (dtype_dst == DT_STRING)) { continue; } + ADP_LOG(INFO) << "remove node: " << edge->src()->name() << "from candidates because string or resource"; if (candidates->erase(edge->src()) > 0) { (void) outSet.insert(edge->src()); } } } -- Gitee From bb7f1502e34d622d87adf1ee659be6dff5f06a23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E7=AC=91=E5=A4=A9?= Date: Fri, 5 Dec 2025 06:40:56 +0000 Subject: [PATCH 2/8] =?UTF-8?q?=E5=9B=9E=E9=80=80=20'Pull=20Request=20!306?= =?UTF-8?q?1=20:=20=E6=94=AF=E6=8C=81string=E7=B1=BB=E5=9E=8B=E8=BE=93?= =?UTF-8?q?=E5=85=A5'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/kernels/geop_npu.cc | 15 +++++---------- tf_adapter/kernels/geop_npu.h | 2 +- .../optimizers/om_partition_subgraphs_pass.cc | 11 ++--------- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index bcad34248..bd66ca6a2 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -2266,12 +2266,12 @@ void GeOp::AnalyzeInputDesc(bool need_collect_shapes, void *tensor_ptr, ge::Tens << ", input data addr: " << reinterpret_cast(data); } -Status GeOp::AnalyzeStringInput(ge::Tensor &input, const std::vector &string_vector) const { - const size_t count = string_vector.size(); +Status GeOp::AnalyzeStringInput(ge::Tensor &input, uint64_t count, const std::string *string_vector) const { uint64_t total_size = 0U; for (uint64_t i = 0U; i < count; i++) { total_size += (string_vector[i].size() + sizeof(ge::StringHead) + 1U); } + std::unique_ptr addr(new (std::nothrow) char[total_size]()); REQUIRES_NOT_NULL(addr); ge::StringHead *string_head = ge::PtrToPtr(addr.get()); @@ -2296,9 +2296,7 @@ Status GeOp::AnalyzeStringInput(ge::Tensor &input, const std::vector(str_size) + 1); - ADP_LOG(INFO) << "[GEOP] String input element: " << i << ", size: " << str_size; } - ADP_LOG(INFO) << "[GEOP] String input total size " << total_size; input.SetData(ge::PtrToPtr(addr.get()), total_size); return Status::OK(); } @@ -2456,13 +2454,10 @@ Status GeOp::BuildInputTensorInfo(OpKernelContext *const ctx, std::vector(tensor.NumElements()); - std::vector string_vector; - for (uint64_t i = 0UL; i < count; i++) { - string_vector.emplace_back(tensor.flat()(i)); - } - ADP_LOG(INFO) << "[GEOP] Analyze string input: " << i << ", element num: " << count; - if (AnalyzeStringInput(input, string_vector) != Status::OK()) { + std::string *string_vector = static_cast(tensor_ptr); + if (AnalyzeStringInput(input, count, string_vector) != Status::OK()) { return errors::Internal("The input string data analyze failed."); } } else { diff --git a/tf_adapter/kernels/geop_npu.h b/tf_adapter/kernels/geop_npu.h index 5a4fcd3cd..c0a0b8028 100644 --- a/tf_adapter/kernels/geop_npu.h +++ b/tf_adapter/kernels/geop_npu.h @@ -99,7 +99,7 @@ public: std::vector &partition_graph, std::map &const_value_map); // Analyze sting input data - Status AnalyzeStringInput(ge::Tensor &input, const std::vector &string_vector) const; + Status AnalyzeStringInput(ge::Tensor &input, uint64_t count, const std::string *string_vector) const; // prepare input tensor Status BuildInputTensorInfo(OpKernelContext *const ctx, diff --git a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc index 7ec2cd4c5..96da1c1c2 100644 --- a/tf_adapter/optimizers/om_partition_subgraphs_pass.cc +++ b/tf_adapter/optimizers/om_partition_subgraphs_pass.cc @@ -566,7 +566,6 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, int64 startTime = InferShapeUtil::GetCurrentTimestap(); bool enable_dp = pass_options["enable_dp"] == "1"; bool mix_compile_mode = pass_options["mix_compile_mode"] == "1"; - int32_t iterations_per_loop = std::atoi(pass_options["iterations_per_loop"].c_str()); compile_mode = mix_compile_mode; std::vector sortedNodes; bool hasIteratorOp = false; @@ -697,14 +696,11 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, << node->name() << " REF input."; continue; } - if ((dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT) || (dtype_dst == DT_STRING)) { + if ((dtype_dst == DT_STRING) || (dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT)) { const AttrValue *attr_value = edge->dst()->attrs().Find(ATTR_NAME_OP_MAX_SIZE); if (attr_value != nullptr) { continue; } if (edge->dst()->type_string() == "Assert") { continue; } if (node->type_string() == "Const") { continue; } - // 非循环下沉场景,string可以作为输出 - if ((iterations_per_loop == 1) && (dtype_dst == DT_STRING)) { continue; } - ADP_LOG(INFO) << "remove node: " << edge->dst()->name() << "from candidates because string or resource"; if (candidates->erase(edge->dst()) > 0) { (void) outSet.insert(edge->dst()); } } } @@ -722,7 +718,7 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, << node->name() << " REF Output."; continue; } - if ((dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT) || (dtype_dst == DT_STRING)) { + if ((dtype_dst == DT_STRING) || (dtype_dst == DT_RESOURCE) || (dtype_dst == DT_VARIANT)) { const AttrValue *attr_value = node->attrs().Find(ATTR_NAME_OP_MAX_SIZE); if (attr_value != nullptr) { ADP_LOG(INFO) << "Node : " << node->name() << " add to candidates, because of had max size."; @@ -734,9 +730,6 @@ Status FindNpuSupportCandidates(const Graph &graph, OrderedNodeSet *candidates, ADP_LOG(EVENT) << "GetNext: " << src_node_type << " node should sink if enable_data_pre_proc is true"; continue; } - // 非循环下沉场景,string可以作为输入 - if ((iterations_per_loop == 1) && (dtype_dst == DT_STRING)) { continue; } - ADP_LOG(INFO) << "remove node: " << edge->src()->name() << "from candidates because string or resource"; if (candidates->erase(edge->src()) > 0) { (void) outSet.insert(edge->src()); } } } -- Gitee From f6601aeff6727350c9c1d11778dba01c759f36f8 Mon Sep 17 00:00:00 2001 From: ZhouChen Date: Sat, 6 Dec 2025 02:44:25 +0000 Subject: [PATCH 3/8] =?UTF-8?q?!3069=20=E4=BF=AE=E6=94=B9cmakelists?= =?UTF-8?q?=EF=BC=8C=E9=80=82=E9=85=8D=E5=BC=80=E6=BA=90=E5=BC=80=E6=94=BE?= =?UTF-8?q?=20Merge=20pull=20request=20!3069=20from=20ZhouChen/fix=5Fcompi?= =?UTF-8?q?le=5F1205?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 ++ tf_adapter_2.x/CMakeLists.txt | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index ae01de341..b7b408e21 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,8 @@ if (ENABLE_OPEN_SRC) include_directories(${ASCEND_OPENSDK_DIR}/include/metadef/pkg_inc) include_directories(${ASCEND_OPENSDK_DIR}/include/metadef/external) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) + file(COPY ${ASCEND_OPENSDK_DIR}/include/metadef/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/register/) + file(COPY ${ASCEND_OPENSDK_DIR}/include/metadef/external/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/include/register/) if (NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/COMPILE_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/LINK_FLAGS OR NOT EXISTS diff --git a/tf_adapter_2.x/CMakeLists.txt b/tf_adapter_2.x/CMakeLists.txt index 04c055b6d..1af119f42 100644 --- a/tf_adapter_2.x/CMakeLists.txt +++ b/tf_adapter_2.x/CMakeLists.txt @@ -36,6 +36,11 @@ else () set(CMAKE_C_FLAGS "${COMPILE_FLAG} ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "${COMPILE_FLAG} ${CMAKE_CXX_FLAGS}") endforeach (COMPILE_FLAG) + + file(COPY ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/register/) + file(COPY ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/external/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/include/register/) + + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) endif () include(${CMAKE_CURRENT_LIST_DIR}/cmake/nlohmann_json.cmake) -- Gitee From c2d72773fa8ffe2b764799513a6896102e02d43d Mon Sep 17 00:00:00 2001 From: huanruizhi Date: Mon, 8 Dec 2025 15:33:11 +0000 Subject: [PATCH 4/8] !3063 Log consistency modification Merge pull request !3063 from huanruizhi/dev --- tf_adapter/kernels/geop_npu.cc | 2 +- tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index bd66ca6a2..73a67ea94 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -1013,7 +1013,7 @@ Status GeOp::CreateGeSession() { << warning_message; } if (init_status != ge::SUCCESS) { - ADP_LOG(ERROR) << "[GePlugin] Init ge failed, ret : " << ToString(init_status); + ADP_LOG(ERROR) << "[GePlugin] Initialize ge failed, ret : " << ToString(init_status); const auto &error_message = GePlugin::GetInstance()->GetInitErrorMessage(); std::stringstream ss; ss << "[GePlugin] Initialize ge failed, ret : " << ToString(init_status) << std::endl diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py index 25936eba5..858c39735 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py @@ -45,4 +45,6 @@ class NPUStrategy(distribute_lib.StrategyV1): """NPU distribute strategy""" def __init__(self, device="/cpu:0"): + if device != "/cpu:0": + raise ValueError('"device" only support "/cpu:0"') super(NPUStrategy, self).__init__(NPUExtended(self, device)) -- Gitee From eadd457fd0ccf23984ffb05d47a0a167526e0878 Mon Sep 17 00:00:00 2001 From: ZhouChen Date: Tue, 9 Dec 2025 13:16:37 +0000 Subject: [PATCH 5/8] =?UTF-8?q?!3049=20=E5=88=86=E7=BA=A7=E7=BC=96?= =?UTF-8?q?=E8=AF=91=20Merge=20pull=20request=20!3049=20from=20ZhouChen/op?= =?UTF-8?q?timization=5Foption?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/interface_spec/api_npu_config.pyh | 2 +- tf_adapter/interface_spec/api_npu_plugin.pyh | 2 +- .../npu_bridge/estimator/npu/npu_config.py | 8 ++- .../npu_bridge/estimator/npu/npu_estimator.py | 22 ++++++++ .../npu_bridge/estimator/npu/npu_plugin.py | 9 ++- .../tests/st/util/testcase/ge_plugin_test.cc | 16 ++++++ .../tests/st/util/testcase/npu_attrs_test.cc | 32 +++++++++++ .../tests/ut/util/testcase/ge_plugin_test.cc | 17 ++++++ .../tests/ut/util/testcase/npu_attrs_test.cc | 36 ++++++++++++ tf_adapter/util/ge_plugin.cc | 10 ++++ tf_adapter/util/npu_attrs.cc | 56 ++++++++++++++++++- .../npu_device/core/npu_wrapper.cpp | 2 + .../python/npu_device/configs/npu_config.py | 2 + 13 files changed, 207 insertions(+), 7 deletions(-) diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 185a2d970..de7b76e6f 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -24,7 +24,7 @@ class NPURunConfig(run_config_lib.RunConfig): ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False, - auto_multistream_parallel_mode=None): + auto_multistream_parallel_mode=None, oo_level="O3", optimization_switch=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh index 2cd0a1608..f71758154 100644 --- a/tf_adapter/interface_spec/api_npu_plugin.pyh +++ b/tf_adapter/interface_spec/api_npu_plugin.pyh @@ -7,6 +7,6 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, distribute_config=None, aoe_config_file=None, precision_mode_v2=None, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, oo_level="O3", optimization_switch=None): def npu_resource_shutdown(): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 645b1849d..674a372e4 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -121,7 +121,9 @@ class NPURunConfig(run_config_lib.RunConfig): input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False, - auto_multistream_parallel_mode=None + auto_multistream_parallel_mode=None, + oo_level="O3", + optimization_switch=None ): """ Constructs a NPUConfig. @@ -199,6 +201,8 @@ class NPURunConfig(run_config_lib.RunConfig): ADAPTIVE: generalizes the varying axes. all_tensor_not_empty: default is: False. auto_multistream_parallel_mode: default is None; cv: cube vector parallel. + oo_level: The switch of optimization level. + optimization_switch: The switch of optimization switch. """ # Check iterations_per_loop. @@ -302,6 +306,8 @@ class NPURunConfig(run_config_lib.RunConfig): self._shape_generalization_mode = shape_generalization_mode self._all_tensor_not_empty = all_tensor_not_empty self._auto_multistream_parallel_mode = auto_multistream_parallel_mode + self._oo_level = oo_level + self._optimization_switch = optimization_switch super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 86bd6a2c7..3dec41e41 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -752,6 +752,24 @@ class NPUEstimator(estimator_lib.Estimator): if config._input_batch_cpy is not None: custom_op.parameter_map["input_batch_cpy"].b = config._input_batch_cpy + def __load_oo_level(self, config, custom_op): + """Load oo_level config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._oo_level is not None: + custom_op.parameter_map["oo_level"].s = tf.compat.as_bytes(config._oo_level) + + def __load_optimization_switch(self, config, custom_op): + """Load optimization_switch config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._optimization_switch is not None: + custom_op.parameter_map["optimization_switch"].s = tf.compat.as_bytes(config._optimization_switch) + def __load_graph_optimizers(self, config): """ Change the session config and load the graph optimizers: @@ -903,6 +921,10 @@ class NPUEstimator(estimator_lib.Estimator): self.__load_input_batch_cpy(config, custom_op) + self.__load_oo_level(config, custom_op) + + self.__load_optimization_switch(config, custom_op) + return config def __load_job_info(self, job_start_file): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index 97571455e..e59a7f0b7 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -76,7 +76,9 @@ def npu_resource_init(graph_run_mode=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, + oo_level="O3", + optimization_switch=None): """Initialize NPU resource""" util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") check_graph_run_mode(graph_run_mode) @@ -129,10 +131,13 @@ def npu_resource_init(graph_run_mode=1, if oo_constant_folding is not None: util.check_bool_type(oo_constant_folding, "oo_constant_folding") init["ge.oo.constantFolding"] = "true" if oo_constant_folding is True else "false" - # input_batch_cpy if input_batch_cpy is not None: util.check_bool_type(input_batch_cpy, "input_batch_cpy") init["ge.inputBatchCpy"] = "true" if input_batch_cpy is True else "false" + if oo_level is not None: + init["ge.oo.level"] = str(oo_level) + if optimization_switch is not None: + init["ge.optimization_switch"] = str(optimization_switch) init_options = tf_adapter.map_string_string(init) tf_adapter.PluginInit(init_options) diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index fbb165193..b4d628c64 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -229,5 +229,21 @@ TEST_F(GePluginTest, PluginInitTest_input_batch_cpy) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_level) { + std::map init_options; + init_options["ge.oo.level"] = "O3"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + +TEST_F(GePluginTest, PluginInitTest_optimization_switch) { + std::map init_options; + init_options["ge.optimizationSwitch"] = "pass1:on"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} } } // end tensorflow diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index d3503e35e..8a46d0094 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -601,5 +601,37 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, GetAllAttrOptions_oo_level) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue oo_level = AttrValue(); + oo_level.set_s("O3"); + attr_map["_oo_level"] = oo_level; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.oo.level"), all_options.cend()); +} + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_optimization_switch) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue optimization_switch = AttrValue(); + optimization_switch.set_s("pass1:on"); + attr_map["_optimization_switch"] = optimization_switch; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.optimizationSwitch"), all_options.cend()); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index fac80fc57..5bd524bf9 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -219,5 +219,22 @@ TEST_F(GePluginTest, PluginInitTest_input_batch_cpy) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_level) { + std::map init_options; + init_options["ge.oo.level"] = "O3"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + +TEST_F(GePluginTest, PluginInitTest_oo_level2) { + std::map init_options; + init_options["ge.optimizationSwitch"] = "pass1:on"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index b9cdce532..6a4e614a6 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -739,5 +739,41 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; EXPECT_EQ(s.ok(), false); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_oo_level) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue oo_level = AttrValue(); + oo_level.set_s("O3"); + (*custom_config->mutable_parameter_map())["oo_level"] = oo_level; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_optimization_switch) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue optimization_switch = AttrValue(); + optimization_switch.set_s("pass1:on"); + attr_map["_optimization_switch"] = optimization_switch; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.optimizationSwitch"), all_options.cend()); +} } } // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 2ae110fd7..c82d050ba 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -131,6 +131,8 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace("ge.inputBatchCpy", "input_batch_cpy"); option_name_map.emplace(ge::OPTION_ALL_TENSOR_NOT_EMPTY, "all_tensor_not_empty"); option_name_map.emplace("ge.autoMultistreamParallelMode", "auto_multistream_parallel_mode"); + option_name_map.emplace("ge.oo.level", "oo_level"); + option_name_map.emplace("ge.optimizationSwitch", "optimization_switch"); } } // namespace @@ -311,6 +313,14 @@ void GePlugin::Init(std::map &init_options, const bool ADP_LOG(INFO) << "[GePlugin] input_batch_cpy : " << init_options["ge.inputBatchCpy"]; } + if (init_options.find("ge.oo.level") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] oo_level : " << init_options["ge.oo.level"]; + } + + if (init_options.find("ge.optimizationSwitch") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] optimization_switch : " << init_options["ge.optimizationSwitch"]; + } + bool tdt_uninit_env = false; (void) ReadBoolFromEnvVar("ASCEND_TDT_UNINIT", false, &tdt_uninit_env); if (!kIsHeterogeneous && !tdt_uninit_env) { diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 6ee28bafe..67406594f 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -516,6 +516,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string aicore_num; std::string all_tensor_not_empty; std::string auto_multistream_parallel_mode; + std::string oo_level; + std::string optimization_switch; const bool is_npu_optimizer_valid = (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()); if (is_npu_optimizer_valid) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -595,6 +597,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_all_tensor_not_empty", &all_tensor_not_empty); (void) ctx->GetAttr("_auto_multistream_parallel_mode", &auto_multistream_parallel_mode); + (void) ctx->GetAttr("_oo_level", &oo_level); + (void) ctx->GetAttr("_optimization_switch", &optimization_switch); } // session options @@ -669,9 +673,13 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["all_tensor_not_empty"] = all_tensor_not_empty; sess_options["auto_multistream_parallel_mode"] = auto_multistream_parallel_mode; sess_options["ge.autoMultistreamParallelMode"] = auto_multistream_parallel_mode; - SetForbiddenClosePassOn(sess_options); sess_options["aicore_num"] = aicore_num; sess_options["ge.aicoreNum"] = aicore_num; + sess_options["ge.oo.level"] = oo_level; + sess_options["oo_level"] = oo_level; + sess_options["ge.optimizationSwitch"] = optimization_switch; + sess_options["optimization_switch"] = optimization_switch; + SetForbiddenClosePassOn(sess_options); return sess_options; } @@ -735,6 +743,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string aicore_num; std::string oo_constant_folding; std::string input_batch_cpy; + std::string oo_level; + std::string optimization_switch; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -779,6 +789,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_oo_constant_folding", &oo_constant_folding); (void) ctx->GetAttr("_input_batch_cpy", &input_batch_cpy); + (void) ctx->GetAttr("_oo_level", &oo_level); + (void) ctx->GetAttr("_optimization_switch", &optimization_switch); } std::lock_guard lock(mutex_); @@ -847,6 +859,10 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["input_batch_cpy"] = input_batch_cpy; init_options_["ge.inputBatchCpy"] = input_batch_cpy; init_options_["ge.inputPlacement"] = "DeviceHbm"; + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + init_options_["optimization_switch"] = optimization_switch; + init_options_["ge.optimizationSwitch"] = optimization_switch; SetForbiddenClosePassOn(init_options_); return init_options_; } @@ -1286,6 +1302,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string all_tensor_not_empty; std::string auto_multistream_parallel_mode; std::string compile_hybrid_mode; + std::string oo_level; + std::string optimization_switch; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1389,6 +1407,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto all_tensor_not_empty_value = attrs.Find("_all_tensor_not_empty"); auto auto_multistream_parallel_mode_value = attrs.Find("_auto_multistream_parallel_mode"); auto compile_hybrid_mode_value = attrs.Find("_compile_hybrid_mode"); + auto oo_level_value = attrs.Find("_oo_level"); + auto optimization_switch_value = attrs.Find("_optimization_switch"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1718,6 +1738,12 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (auto_multistream_parallel_mode_value != nullptr) { auto_multistream_parallel_mode = auto_multistream_parallel_mode_value->s(); } + if (oo_level_value != nullptr) { + oo_level = oo_level_value->s(); + } + if (optimization_switch_value != nullptr) { + optimization_switch = optimization_switch_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1847,6 +1873,10 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["ge.inputBatchCpy"] = input_batch_cpy; all_options["shape_generalization_mode"] = shape_generalization_mode; all_options["compile_hybrid_mode"] = compile_hybrid_mode; + all_options["oo_level"] = oo_level; + all_options["ge.oo.level"] = oo_level; + all_options["optimization_switch"] = optimization_switch; + all_options["ge.optimizationSwitch"] = optimization_switch; return all_options; } @@ -1978,6 +2008,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool all_tensor_not_empty = false; std::string auto_multistream_parallel_mode; std::string compile_hybrid_mode; + std::string oo_level = "O3"; + std::string optimization_switch; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2573,6 +2605,17 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["ge.inputBatchCpy"] = input_batch_cpy_str; init_options_["ge.inputPlacement"] = "DeviceHbm"; } + if (params.count("oo_level") > 0) { + oo_level = params.at("oo_level").s(); + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + } + if (params.count("optimization_switch") > 0) { + optimization_switch = params.at("optimization_switch").s(); + init_options_["optimization_switch"] = optimization_switch; + init_options_["ge.optimizationSwitch"] = optimization_switch; + } + if (params.count("jit_compile") > 0) { const static std::vector kJitCompileList = {"true", "false", @@ -2671,6 +2714,11 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["auto_multistream_parallel_mode"] = auto_multistream_parallel_mode; sess_options["ge.autoMultistreamParallelMode"] = auto_multistream_parallel_mode; graph_options["compile_hybrid_mode"] = compile_hybrid_mode; + sess_options["oo_level"] = oo_level; + sess_options["ge.oo.level"] = oo_level; + sess_options["optimization_switch"] = optimization_switch; + sess_options["ge.optimizationSwitch"] = optimization_switch; + init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); init_options_["profiling_options"] = profiling_options; @@ -2792,6 +2840,10 @@ void NpuAttrs::LogOptions(const std::map &options) { // tf场景存在某些不可关闭pass,因此需要默认设置forbidden_close_pass为on,即开启这些不可关闭的pass void NpuAttrs::SetForbiddenClosePassOn(std::map &option) { - option["ge.optimizationSwitch"].append("forbidden_close_pass:on"); + if (option["ge.optimizationSwitch"].empty()) { + option["ge.optimizationSwitch"] = "forbidden_close_pass:on"; + } else { + option["ge.optimizationSwitch"].append(";forbidden_close_pass:on"); + } } } // namespace tensorflow diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 2671a0579..d1ed19793 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -98,6 +98,8 @@ const std::map kGlobalConfigOptions = { {"oo_constant_folding", "ge.oo.constantFolding"}, {"input_batch_cpy", "ge.inputBatchCpy"}, {"shape_generalization_mode", "shape_generalization_mode"}, + {"oo_level", "ge.oo.level"}, + {"optimization_switch", "ge.optimizationSwitch"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index b18bf56d9..da66c4eac 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -86,5 +86,7 @@ class NpuConfig(NpuBaseConfig): self.shape_generalization_mode = OptionValue("STRICT", ["STRICT", "FULL", "ADAPTIVE"]) self.all_tensor_not_empty = OptionValue(False, [True, False]) self.auto_multistream_parallel_mode = OptionValue(None, ['cv']) + self.oo_level = OptionValue("O3", ["O0", "O1", "O2", "O3"]) + self.optimization_switch = OptionValue(None, None) super(NpuConfig, self).__init__() -- Gitee From 43b5c0439d1c0839e27b31e3878a2615e59665a0 Mon Sep 17 00:00:00 2001 From: ZhouChen Date: Thu, 11 Dec 2025 01:53:52 +0000 Subject: [PATCH 6/8] =?UTF-8?q?!3073=20=E4=BF=AE=E5=A4=8DTFA=E7=BC=96?= =?UTF-8?q?=E8=AF=91=E9=97=AE=E9=A2=98=20Merge=20pull=20request=20!3073=20?= =?UTF-8?q?from=20ZhouChen/fix=5Fcompile=5F1205?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 7 +++- .../tests/depends/ge_runner/src/ge_log.h | 36 ------------------- .../depends/ge_runner/src/ge_tensor_stub.cc | 1 - .../tests/depends/ge_runner/src/ge_util.h | 1 - tf_adapter_2.x/CMakeLists.txt | 7 +++- 5 files changed, 12 insertions(+), 40 deletions(-) delete mode 100644 tf_adapter/tests/depends/ge_runner/src/ge_log.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b7b408e21..1dee5af63 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,9 +42,14 @@ if (ENABLE_OPEN_SRC) include_directories(${ASCEND_OPENSDK_DIR}/include/metadef/pkg_inc) include_directories(${ASCEND_OPENSDK_DIR}/include/metadef/external) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef) file(COPY ${ASCEND_OPENSDK_DIR}/include/metadef/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/register/) file(COPY ${ASCEND_OPENSDK_DIR}/include/metadef/external/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/include/register/) - + configure_file( + ${ASCEND_OPENSDK_DIR}/include/metadef/common/ge_common/inner_error_codes.h + ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/common/ge_common/ge_inner_error_codes.h + COPYONLY + ) if (NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/COMPILE_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/LINK_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/PYTHON_BIN_PATH OR NOT EXISTS diff --git a/tf_adapter/tests/depends/ge_runner/src/ge_log.h b/tf_adapter/tests/depends/ge_runner/src/ge_log.h deleted file mode 100644 index e7fbf54ba..000000000 --- a/tf_adapter/tests/depends/ge_runner/src/ge_log.h +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef COMMON_GRAPH_DEBUG_GE_LOG_H_ -#define COMMON_GRAPH_DEBUG_GE_LOG_H_ - -#include "graph/ge_error_codes.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" - -#define GE_LOGE(fmt, ...) // GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, fmt, ##__VA_ARGS__) - -// Only check error log -#define GE_CHK_BOOL_ONLY_LOG(expr, ...) \ - do { \ - const bool b = (expr); \ - if (!b) { \ - // GELOGI(__VA_ARGS__); \ - } \ - } while (false) - -#endif // COMMON_GRAPH_DEBUG_GE_LOG_H_ - diff --git a/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc b/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc index bfca72f08..1395910f1 100644 --- a/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc +++ b/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc @@ -20,7 +20,6 @@ #include #include #include "ge_util.h" -#include "ge_log.h" namespace ge { // static constexpr int64_t UNKNOWN_DIM_NUM = -2; diff --git a/tf_adapter/tests/depends/ge_runner/src/ge_util.h b/tf_adapter/tests/depends/ge_runner/src/ge_util.h index 863b1e493..0e38d9b08 100644 --- a/tf_adapter/tests/depends/ge_runner/src/ge_util.h +++ b/tf_adapter/tests/depends/ge_runner/src/ge_util.h @@ -18,7 +18,6 @@ #define COMMON_GRAPH_DEBUG_GE_UTIL_H_ #include "framework/common/util.h" -#include "ge_log.h" #include "graph/ge_error_codes.h" namespace ge { template diff --git a/tf_adapter_2.x/CMakeLists.txt b/tf_adapter_2.x/CMakeLists.txt index 1af119f42..165a231bc 100644 --- a/tf_adapter_2.x/CMakeLists.txt +++ b/tf_adapter_2.x/CMakeLists.txt @@ -39,8 +39,13 @@ else () file(COPY ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/register/) file(COPY ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/external/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/include/register/) - + configure_file( + ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/common/ge_common/inner_error_codes.h + ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/common/ge_common/ge_inner_error_codes.h + COPYONLY + ) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef) endif () include(${CMAKE_CURRENT_LIST_DIR}/cmake/nlohmann_json.cmake) -- Gitee From 36a28143d80fb00462b8edca19438c58756714fb Mon Sep 17 00:00:00 2001 From: yangyongqiang Date: Fri, 12 Dec 2025 10:14:29 +0000 Subject: [PATCH 7/8] =?UTF-8?q?!3074=20=E5=BD=93=E5=88=86=E6=A1=A3?= =?UTF-8?q?=E5=BC=80=E5=85=B3=E6=B2=A1=E8=AE=BE=E7=BD=AE=E4=BD=86compile?= =?UTF-8?q?=5Fhybrid=5Fmode=E8=AE=BE=E7=BD=AE=E7=9A=84=E6=97=B6=E5=80=99?= =?UTF-8?q?=E9=9C=80=E8=A6=81=E6=8A=A5=E9=94=99=20Merge=20pull=20request?= =?UTF-8?q?=20!3074=20from=20yangyongqiang/cq=5Fge=5Fdev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/st/util/testcase/npu_attrs_test.cc | 15 +++++++++++++++ .../tests/ut/util/testcase/npu_attrs_test.cc | 15 +++++++++++++++ tf_adapter/util/npu_attrs.cc | 11 ++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index 8a46d0094..1a00a753b 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -601,6 +601,21 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode_no_set_dynamic_option) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + AttrValue compile_hybrid_mode_value = AttrValue(); + compile_hybrid_mode_value.set_i(1); + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + TEST_F(NpuAttrTest, GetAllAttrOptions_oo_level) { AttrValueMap attr_map; diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index 6a4e614a6..980f3a1b4 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -740,6 +740,21 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode_no_set_dynamic_option) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + AttrValue compile_hybrid_mode_value = AttrValue(); + compile_hybrid_mode_value.set_i(1); + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + TEST_F(NpuAttrTest, SetNpuOptimizerAttr_oo_level) { GraphOptimizationPassOptions options; SessionOptions session_options; diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 67406594f..c07ae9009 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -2340,7 +2340,16 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } } else if (params.count("input_shape") == 0 && params.count("dynamic_dims") == 0 && params.count("dynamic_node_type") == 0) { - // the three parameters are not set normally. + if (params.count("compile_hybrid_mode") > 0) { + compile_hybrid_mode = std::to_string(params.at("compile_hybrid_mode").i()); + if (compile_hybrid_mode == "1") { + ADP_LOG(ERROR) + << "input_shape, dynamic_dims and dynamic_node_type should be set when compile_hybrid_mode is 1"; + LOG(ERROR) << "input_shape, dynamic_dims and dynamic_node_type should be set when compile_hybrid_mode is 1"; + return errors::Internal( + "input_shape, dynamic_dims and dynamic_node_type should be set when compile_hybrid_mode is 1"); + } + } } else { ADP_LOG(FATAL) << "input_shape, dynamic_dims and dynamic_node_type should use together."; LOG(FATAL) << "input_shape, dynamic_dims and dynamic_node_type should use together."; -- Gitee From d3f7f41ac567b7929fd05377b643e96229fa7092 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=94=90=E8=B1=AA=E6=9D=B0?= Date: Fri, 19 Dec 2025 11:47:48 +0000 Subject: [PATCH 8/8] =?UTF-8?q?!3078=20optimize=20scale=20process=20in=20i?= =?UTF-8?q?nput=5Fshape=20scene=20Merge=20pull=20request=20!3078=20from=20?= =?UTF-8?q?=E5=94=90=E8=B1=AA=E6=9D=B0/ge=5Fdev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tf_adapter/kernels/geop_npu.cc | 21 +++++++++++++++++++ .../ut/kernels/testcase/geop_npu_test.cc | 19 +++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index 73a67ea94..50b29e74a 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -357,6 +357,21 @@ std::string CurrentTimeInStr() { return std::string(buffer); } +void ReplaceTargetStr(std::string &str, const std::string &from, const std::string &to) { + size_t pos = 0U; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } +} + +void RewriteInputShapeOption(std::string &str) { + // in case of a:-1,2,3;b:0 + str += ";"; + ReplaceTargetStr(str, ":0;", ":;"); + str.pop_back(); +} + static const int64 kMicrosToMillis = 1000; const int kInvalidGraphId = 0; const int kMaxCacheNum = 10; @@ -1119,6 +1134,12 @@ Status GeOp::ParserGraph(OpKernelContext *ctx, const std::vector &input_ Status GeOp::AddGraph(OpKernelContext *ctx, const uint32_t &graph_id) { // call ge session addGraph api auto graph_options = graph_options_; + const auto it = graph_options.find("ge.inputShape"); + if (it != graph_options.end()) { + // when some input is scale, input_shape option is changed to input_name:0 according to tfa guide, + // here replace to input_name: to ensure it is scale shape [] instead of empty shape [0] + RewriteInputShapeOption(it->second); + } if (is_aoe_) { graph_options["ge.buildMode"] = "normal"; } diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index b3706d633..803528d68 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -20,6 +20,7 @@ #undef private namespace tensorflow { +extern void RewriteInputShapeOption(std::string &str); namespace { using geDataUniquePtr = std::unique_ptr>; class NpuGetNextOutputInfo { @@ -573,6 +574,24 @@ TEST_F(GeOpTest, GeOpDynamicDimsTest) { EXPECT_TRUE(attrs.find("_input_shape") != attrs.end()); EXPECT_TRUE(!attrs["_input_shape"].s().empty()); } + +TEST_F(GeOpTest, RewriteInputShapeOptionTest) { + std::string input_shape("a:-1,2;b:0"); + std::string target_input_shape("a:-1,2;b:"); + RewriteInputShapeOption(input_shape); + EXPECT_TRUE(input_shape == target_input_shape); + + input_shape = "a:-1,2;b:0;"; + target_input_shape = "a:-1,2;b:;"; + RewriteInputShapeOption(input_shape); + EXPECT_TRUE(input_shape == target_input_shape); + + input_shape = "a:-1,2;b:0,1,2"; + target_input_shape = "a:-1,2;b:0,1,2"; + RewriteInputShapeOption(input_shape); + EXPECT_TRUE(input_shape == target_input_shape); +} + TEST_F(GeOpTest, GeOpDynamicDimsNodeType1Test) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_dims_node_type1.pbtxt"; -- Gitee