diff --git a/CMakeLists.txt b/CMakeLists.txt index ae01de3415dd7925fe10e6dc25cc9e0c09cbcc4b..1dee5af63b8e93d3356078646fc7758a8f6e80c5 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,14 @@ if (ENABLE_OPEN_SRC) include_directories(${ASCEND_OPENSDK_DIR}/include/metadef/pkg_inc) include_directories(${ASCEND_OPENSDK_DIR}/include/metadef/external) include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) - + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef) + file(COPY ${ASCEND_OPENSDK_DIR}/include/metadef/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/register/) + file(COPY ${ASCEND_OPENSDK_DIR}/include/metadef/external/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/include/register/) + configure_file( + ${ASCEND_OPENSDK_DIR}/include/metadef/common/ge_common/inner_error_codes.h + ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/common/ge_common/ge_inner_error_codes.h + COPYONLY + ) if (NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/COMPILE_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/LINK_FLAGS OR NOT EXISTS ${CMAKE_CURRENT_LIST_DIR}/tools/PYTHON_BIN_PATH OR NOT EXISTS diff --git a/tf_adapter/interface_spec/api_npu_config.pyh b/tf_adapter/interface_spec/api_npu_config.pyh index 185a2d9707f1bedcb30b1a98b81b392e785258ee..de7b76e6faa259a312b33ff04bced68d6eaa0a44 100644 --- a/tf_adapter/interface_spec/api_npu_config.pyh +++ b/tf_adapter/interface_spec/api_npu_config.pyh @@ -24,7 +24,7 @@ class NPURunConfig(run_config_lib.RunConfig): ac_parallel_enable=None, quant_dumpable=None, input_fusion_size=131072, compile_dynamic_mode=None, graph_max_parallel_model_num=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False, - auto_multistream_parallel_mode=None): + auto_multistream_parallel_mode=None, oo_level="O3", optimization_switch=None): class ProfilingConfig(): def __init__(self, enable_profiling=False, profiling_options=None): diff --git a/tf_adapter/interface_spec/api_npu_plugin.pyh b/tf_adapter/interface_spec/api_npu_plugin.pyh index 2cd0a160876d7579de984b0205aa57257e246731..f71758154268a53af7023e9955a5448c0e3fe039 100644 --- a/tf_adapter/interface_spec/api_npu_plugin.pyh +++ b/tf_adapter/interface_spec/api_npu_plugin.pyh @@ -7,6 +7,6 @@ def npu_resource_init(graph_run_mode=1, op_debug_level=0, enable_profiling=False op_compiler_cache_mode=None, op_compiler_cache_dir=None, debug_dir=None, hcom_multi_mode=False, distribute_config=None, aoe_config_file=None, precision_mode_v2=None, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, oo_level="O3", optimization_switch=None): def npu_resource_shutdown(): diff --git a/tf_adapter/kernels/geop_npu.cc b/tf_adapter/kernels/geop_npu.cc index bd66ca6a263804b61c6f92b127eeaba5a6f45d60..50b29e74acb4908815f26afdd3aa7ceba45b70cf 100644 --- a/tf_adapter/kernels/geop_npu.cc +++ b/tf_adapter/kernels/geop_npu.cc @@ -357,6 +357,21 @@ std::string CurrentTimeInStr() { return std::string(buffer); } +void ReplaceTargetStr(std::string &str, const std::string &from, const std::string &to) { + size_t pos = 0U; + while ((pos = str.find(from, pos)) != std::string::npos) { + str.replace(pos, from.length(), to); + pos += to.length(); + } +} + +void RewriteInputShapeOption(std::string &str) { + // in case of a:-1,2,3;b:0 + str += ";"; + ReplaceTargetStr(str, ":0;", ":;"); + str.pop_back(); +} + static const int64 kMicrosToMillis = 1000; const int kInvalidGraphId = 0; const int kMaxCacheNum = 10; @@ -1013,7 +1028,7 @@ Status GeOp::CreateGeSession() { << warning_message; } if (init_status != ge::SUCCESS) { - ADP_LOG(ERROR) << "[GePlugin] Init ge failed, ret : " << ToString(init_status); + ADP_LOG(ERROR) << "[GePlugin] Initialize ge failed, ret : " << ToString(init_status); const auto &error_message = GePlugin::GetInstance()->GetInitErrorMessage(); std::stringstream ss; ss << "[GePlugin] Initialize ge failed, ret : " << ToString(init_status) << std::endl @@ -1119,6 +1134,12 @@ Status GeOp::ParserGraph(OpKernelContext *ctx, const std::vector &input_ Status GeOp::AddGraph(OpKernelContext *ctx, const uint32_t &graph_id) { // call ge session addGraph api auto graph_options = graph_options_; + const auto it = graph_options.find("ge.inputShape"); + if (it != graph_options.end()) { + // when some input is scale, input_shape option is changed to input_name:0 according to tfa guide, + // here replace to input_name: to ensure it is scale shape [] instead of empty shape [0] + RewriteInputShapeOption(it->second); + } if (is_aoe_) { graph_options["ge.buildMode"] = "normal"; } diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py index 645b1849dd0b2892b270ae943c894d5444ef4e56..674a372e4bab7edf184a0331d5844cab0c5665f1 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_config.py @@ -121,7 +121,9 @@ class NPURunConfig(run_config_lib.RunConfig): input_batch_cpy=False, shape_generalization_mode="STRICT", all_tensor_not_empty=False, - auto_multistream_parallel_mode=None + auto_multistream_parallel_mode=None, + oo_level="O3", + optimization_switch=None ): """ Constructs a NPUConfig. @@ -199,6 +201,8 @@ class NPURunConfig(run_config_lib.RunConfig): ADAPTIVE: generalizes the varying axes. all_tensor_not_empty: default is: False. auto_multistream_parallel_mode: default is None; cv: cube vector parallel. + oo_level: The switch of optimization level. + optimization_switch: The switch of optimization switch. """ # Check iterations_per_loop. @@ -302,6 +306,8 @@ class NPURunConfig(run_config_lib.RunConfig): self._shape_generalization_mode = shape_generalization_mode self._all_tensor_not_empty = all_tensor_not_empty self._auto_multistream_parallel_mode = auto_multistream_parallel_mode + self._oo_level = oo_level + self._optimization_switch = optimization_switch super(NPURunConfig, self).__init__( model_dir=model_dir, tf_random_seed=tf_random_seed, diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py index 86bd6a2c7b4b5c6796ffc57cd98ad190b109567a..3dec41e41ccf276c6540a1eec33ef28c3a2dcfb0 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_estimator.py @@ -752,6 +752,24 @@ class NPUEstimator(estimator_lib.Estimator): if config._input_batch_cpy is not None: custom_op.parameter_map["input_batch_cpy"].b = config._input_batch_cpy + def __load_oo_level(self, config, custom_op): + """Load oo_level config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._oo_level is not None: + custom_op.parameter_map["oo_level"].s = tf.compat.as_bytes(config._oo_level) + + def __load_optimization_switch(self, config, custom_op): + """Load optimization_switch config, and add to custom_optimizers + Args: + config: NPURunConfig. + custom_op: Customer optimizers. + """ + if config._optimization_switch is not None: + custom_op.parameter_map["optimization_switch"].s = tf.compat.as_bytes(config._optimization_switch) + def __load_graph_optimizers(self, config): """ Change the session config and load the graph optimizers: @@ -903,6 +921,10 @@ class NPUEstimator(estimator_lib.Estimator): self.__load_input_batch_cpy(config, custom_op) + self.__load_oo_level(config, custom_op) + + self.__load_optimization_switch(config, custom_op) + return config def __load_job_info(self, job_start_file): diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py index 97571455e647332275e55d7d699f2f480a742682..e59a7f0b772fa891f32fddde228663cb3bca03f4 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_plugin.py @@ -76,7 +76,9 @@ def npu_resource_init(graph_run_mode=1, export_compile_stat=1, aicore_num=None, oo_constant_folding=True, - input_batch_cpy=False): + input_batch_cpy=False, + oo_level="O3", + optimization_switch=None): """Initialize NPU resource""" util.check_nonnegative_integer(graph_run_mode, "graph_run_mode") check_graph_run_mode(graph_run_mode) @@ -129,10 +131,13 @@ def npu_resource_init(graph_run_mode=1, if oo_constant_folding is not None: util.check_bool_type(oo_constant_folding, "oo_constant_folding") init["ge.oo.constantFolding"] = "true" if oo_constant_folding is True else "false" - # input_batch_cpy if input_batch_cpy is not None: util.check_bool_type(input_batch_cpy, "input_batch_cpy") init["ge.inputBatchCpy"] = "true" if input_batch_cpy is True else "false" + if oo_level is not None: + init["ge.oo.level"] = str(oo_level) + if optimization_switch is not None: + init["ge.optimization_switch"] = str(optimization_switch) init_options = tf_adapter.map_string_string(init) tf_adapter.PluginInit(init_options) diff --git a/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py b/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py index 25936eba5561c7664949eb95482e4c6129fa2c22..858c397351a294560de8205365e01589f57ff45a 100644 --- a/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py +++ b/tf_adapter/python/npu_bridge/estimator/npu/npu_strategy.py @@ -45,4 +45,6 @@ class NPUStrategy(distribute_lib.StrategyV1): """NPU distribute strategy""" def __init__(self, device="/cpu:0"): + if device != "/cpu:0": + raise ValueError('"device" only support "/cpu:0"') super(NPUStrategy, self).__init__(NPUExtended(self, device)) diff --git a/tf_adapter/tests/depends/ge_runner/src/ge_log.h b/tf_adapter/tests/depends/ge_runner/src/ge_log.h deleted file mode 100644 index e7fbf54bae65b9c42df56407a5b48e2cd1a656f3..0000000000000000000000000000000000000000 --- a/tf_adapter/tests/depends/ge_runner/src/ge_log.h +++ /dev/null @@ -1,36 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef COMMON_GRAPH_DEBUG_GE_LOG_H_ -#define COMMON_GRAPH_DEBUG_GE_LOG_H_ - -#include "graph/ge_error_codes.h" -#include "framework/common/debug/ge_log.h" -#include "framework/common/debug/log.h" - -#define GE_LOGE(fmt, ...) // GE_LOG_ERROR(GE_MODULE_NAME, ge::FAILED, fmt, ##__VA_ARGS__) - -// Only check error log -#define GE_CHK_BOOL_ONLY_LOG(expr, ...) \ - do { \ - const bool b = (expr); \ - if (!b) { \ - // GELOGI(__VA_ARGS__); \ - } \ - } while (false) - -#endif // COMMON_GRAPH_DEBUG_GE_LOG_H_ - diff --git a/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc b/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc index bfca72f08d2af81998213f6f152d157130d04520..1395910f1de5980ab3e6ec521a56988e5d822f53 100644 --- a/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc +++ b/tf_adapter/tests/depends/ge_runner/src/ge_tensor_stub.cc @@ -20,7 +20,6 @@ #include #include #include "ge_util.h" -#include "ge_log.h" namespace ge { // static constexpr int64_t UNKNOWN_DIM_NUM = -2; diff --git a/tf_adapter/tests/depends/ge_runner/src/ge_util.h b/tf_adapter/tests/depends/ge_runner/src/ge_util.h index 863b1e493734fe7fffea1ad2a1ce53a24219b2c9..0e38d9b084100d60433f4f79d482f9034f407cd8 100644 --- a/tf_adapter/tests/depends/ge_runner/src/ge_util.h +++ b/tf_adapter/tests/depends/ge_runner/src/ge_util.h @@ -18,7 +18,6 @@ #define COMMON_GRAPH_DEBUG_GE_UTIL_H_ #include "framework/common/util.h" -#include "ge_log.h" #include "graph/ge_error_codes.h" namespace ge { template diff --git a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc index fbb165193d89ba5ca615673fe2435b574400b3e1..b4d628c64a56feca75b153ad8f8b18386484a206 100644 --- a/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/st/util/testcase/ge_plugin_test.cc @@ -229,5 +229,21 @@ TEST_F(GePluginTest, PluginInitTest_input_batch_cpy) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_level) { + std::map init_options; + init_options["ge.oo.level"] = "O3"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + +TEST_F(GePluginTest, PluginInitTest_optimization_switch) { + std::map init_options; + init_options["ge.optimizationSwitch"] = "pass1:on"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} } } // end tensorflow diff --git a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc index d3503e35ec8d7fe8f5440c77b731b98b77d3c690..1a00a753bc3c0b207d754e61fde99a0b5d833d21 100644 --- a/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/st/util/testcase/npu_attrs_test.cc @@ -601,5 +601,52 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { EXPECT_EQ(s.ok(), false); } +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode_no_set_dynamic_option) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + AttrValue compile_hybrid_mode_value = AttrValue(); + compile_hybrid_mode_value.set_i(1); + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_oo_level) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue oo_level = AttrValue(); + oo_level.set_s("O3"); + attr_map["_oo_level"] = oo_level; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.oo.level"), all_options.cend()); +} + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_optimization_switch) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue optimization_switch = AttrValue(); + optimization_switch.set_s("pass1:on"); + attr_map["_optimization_switch"] = optimization_switch; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.optimizationSwitch"), all_options.cend()); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc index b3706d6330c10f99675a1dbd5b501e637ae3ec0e..803528d681dd78dfe6dc9f147c4c6736ad280f2c 100644 --- a/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc +++ b/tf_adapter/tests/ut/kernels/testcase/geop_npu_test.cc @@ -20,6 +20,7 @@ #undef private namespace tensorflow { +extern void RewriteInputShapeOption(std::string &str); namespace { using geDataUniquePtr = std::unique_ptr>; class NpuGetNextOutputInfo { @@ -573,6 +574,24 @@ TEST_F(GeOpTest, GeOpDynamicDimsTest) { EXPECT_TRUE(attrs.find("_input_shape") != attrs.end()); EXPECT_TRUE(!attrs["_input_shape"].s().empty()); } + +TEST_F(GeOpTest, RewriteInputShapeOptionTest) { + std::string input_shape("a:-1,2;b:0"); + std::string target_input_shape("a:-1,2;b:"); + RewriteInputShapeOption(input_shape); + EXPECT_TRUE(input_shape == target_input_shape); + + input_shape = "a:-1,2;b:0;"; + target_input_shape = "a:-1,2;b:;"; + RewriteInputShapeOption(input_shape); + EXPECT_TRUE(input_shape == target_input_shape); + + input_shape = "a:-1,2;b:0,1,2"; + target_input_shape = "a:-1,2;b:0,1,2"; + RewriteInputShapeOption(input_shape); + EXPECT_TRUE(input_shape == target_input_shape); +} + TEST_F(GeOpTest, GeOpDynamicDimsNodeType1Test) { NodeDef node_def; std::string graph_def_path = "tf_adapter/tests/ut/kernels/pbtxt/geop_dynamic_dims_node_type1.pbtxt"; diff --git a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc index fac80fc57529d18c25c1ed4fd2afbbd28a2ff703..5bd524bf92c7b4c51fec66c6af76dfd363f0785b 100644 --- a/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc +++ b/tf_adapter/tests/ut/util/testcase/ge_plugin_test.cc @@ -219,5 +219,22 @@ TEST_F(GePluginTest, PluginInitTest_input_batch_cpy) { ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); NpuClose(); } + +TEST_F(GePluginTest, PluginInitTest_oo_level) { + std::map init_options; + init_options["ge.oo.level"] = "O3"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + +TEST_F(GePluginTest, PluginInitTest_oo_level2) { + std::map init_options; + init_options["ge.optimizationSwitch"] = "pass1:on"; + PluginInit(init_options); + ASSERT_FALSE(GePlugin::GetInstance()->GetInitOptions().empty()); + NpuClose(); +} + } } // end tensorflow diff --git a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc index b9cdce5324159b1e6fe1068d2b397b5e00bc4a93..980f3a1b4aeb8342a63dd8285d6b6d8611cc3a73 100644 --- a/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc +++ b/tf_adapter/tests/ut/util/testcase/npu_attrs_test.cc @@ -739,5 +739,56 @@ TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode) { (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; EXPECT_EQ(s.ok(), false); } + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_compile_hybrid_mode_no_set_dynamic_option) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + AttrValue compile_hybrid_mode_value = AttrValue(); + compile_hybrid_mode_value.set_i(1); + (*custom_config->mutable_parameter_map())["compile_hybrid_mode"] = compile_hybrid_mode_value; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, SetNpuOptimizerAttr_oo_level) { + GraphOptimizationPassOptions options; + SessionOptions session_options; + session_options.config.mutable_graph_options()->mutable_optimizer_options()->set_do_function_inlining(true); + auto *custom_config = + session_options.config.mutable_graph_options()->mutable_rewrite_options()->add_custom_optimizers(); + custom_config->set_name("NpuOptimizer"); + options.session_options = &session_options; + + AttrValue oo_level = AttrValue(); + oo_level.set_s("O3"); + (*custom_config->mutable_parameter_map())["oo_level"] = oo_level; + + AttrValue jit_compile = AttrValue(); + jit_compile.set_s("2"); + (*custom_config->mutable_parameter_map())["jit_compile"] = jit_compile; + Status s = NpuAttrs::SetNpuOptimizerAttr(options, reinterpret_cast(1)); + EXPECT_EQ(s.ok(), false); +} + +TEST_F(NpuAttrTest, GetAllAttrOptions_optimization_switch) { + AttrValueMap attr_map; + + AttrValue npu_optimizer = AttrValue(); + npu_optimizer.set_s("NpuOptimizer"); + attr_map["_NpuOptimizer"] = npu_optimizer; + + AttrValue optimization_switch = AttrValue(); + optimization_switch.set_s("pass1:on"); + attr_map["_optimization_switch"] = optimization_switch; + + AttrSlice attrs(&attr_map); + const auto &all_options = NpuAttrs::GetAllAttrOptions(attrs); + EXPECT_NE(all_options.find("ge.optimizationSwitch"), all_options.cend()); +} } } // end tensorflow diff --git a/tf_adapter/util/ge_plugin.cc b/tf_adapter/util/ge_plugin.cc index 2ae110fd7a70a701f21fa2cc7446b757ad6caed9..c82d050baee560f5bafa048642e7eaabcd1fd9a2 100644 --- a/tf_adapter/util/ge_plugin.cc +++ b/tf_adapter/util/ge_plugin.cc @@ -131,6 +131,8 @@ void SetOptionNameMap(json &option_name_map) { option_name_map.emplace("ge.inputBatchCpy", "input_batch_cpy"); option_name_map.emplace(ge::OPTION_ALL_TENSOR_NOT_EMPTY, "all_tensor_not_empty"); option_name_map.emplace("ge.autoMultistreamParallelMode", "auto_multistream_parallel_mode"); + option_name_map.emplace("ge.oo.level", "oo_level"); + option_name_map.emplace("ge.optimizationSwitch", "optimization_switch"); } } // namespace @@ -311,6 +313,14 @@ void GePlugin::Init(std::map &init_options, const bool ADP_LOG(INFO) << "[GePlugin] input_batch_cpy : " << init_options["ge.inputBatchCpy"]; } + if (init_options.find("ge.oo.level") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] oo_level : " << init_options["ge.oo.level"]; + } + + if (init_options.find("ge.optimizationSwitch") != init_options.end()) { + ADP_LOG(INFO) << "[GePlugin] optimization_switch : " << init_options["ge.optimizationSwitch"]; + } + bool tdt_uninit_env = false; (void) ReadBoolFromEnvVar("ASCEND_TDT_UNINIT", false, &tdt_uninit_env); if (!kIsHeterogeneous && !tdt_uninit_env) { diff --git a/tf_adapter/util/npu_attrs.cc b/tf_adapter/util/npu_attrs.cc index 6ee28bafe327e9d92f0c28be527ee9f4766fdb79..c07ae9009f9851274461179d4dfdc3f420c12a35 100644 --- a/tf_adapter/util/npu_attrs.cc +++ b/tf_adapter/util/npu_attrs.cc @@ -516,6 +516,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr std::string aicore_num; std::string all_tensor_not_empty; std::string auto_multistream_parallel_mode; + std::string oo_level; + std::string optimization_switch; const bool is_npu_optimizer_valid = (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()); if (is_npu_optimizer_valid) { (void) ctx->GetAttr("_variable_format_optimize", &variable_format_optimize); @@ -595,6 +597,8 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_all_tensor_not_empty", &all_tensor_not_empty); (void) ctx->GetAttr("_auto_multistream_parallel_mode", &auto_multistream_parallel_mode); + (void) ctx->GetAttr("_oo_level", &oo_level); + (void) ctx->GetAttr("_optimization_switch", &optimization_switch); } // session options @@ -669,9 +673,13 @@ std::map NpuAttrs::GetSessOptions(const OpKernelConstr sess_options["all_tensor_not_empty"] = all_tensor_not_empty; sess_options["auto_multistream_parallel_mode"] = auto_multistream_parallel_mode; sess_options["ge.autoMultistreamParallelMode"] = auto_multistream_parallel_mode; - SetForbiddenClosePassOn(sess_options); sess_options["aicore_num"] = aicore_num; sess_options["ge.aicoreNum"] = aicore_num; + sess_options["ge.oo.level"] = oo_level; + sess_options["oo_level"] = oo_level; + sess_options["ge.optimizationSwitch"] = optimization_switch; + sess_options["optimization_switch"] = optimization_switch; + SetForbiddenClosePassOn(sess_options); return sess_options; } @@ -735,6 +743,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr std::string aicore_num; std::string oo_constant_folding; std::string input_batch_cpy; + std::string oo_level; + std::string optimization_switch; if (ctx != nullptr && ctx->GetAttr("_NpuOptimizer", &npuOptimizer) == Status::OK()) { (void) ctx->GetAttr("_precision_mode", &precision_mode); @@ -779,6 +789,8 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr (void) ctx->GetAttr("_aicore_num", &aicore_num); (void) ctx->GetAttr("_oo_constant_folding", &oo_constant_folding); (void) ctx->GetAttr("_input_batch_cpy", &input_batch_cpy); + (void) ctx->GetAttr("_oo_level", &oo_level); + (void) ctx->GetAttr("_optimization_switch", &optimization_switch); } std::lock_guard lock(mutex_); @@ -847,6 +859,10 @@ std::map NpuAttrs::GetInitOptions(const OpKernelConstr init_options_["input_batch_cpy"] = input_batch_cpy; init_options_["ge.inputBatchCpy"] = input_batch_cpy; init_options_["ge.inputPlacement"] = "DeviceHbm"; + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + init_options_["optimization_switch"] = optimization_switch; + init_options_["ge.optimizationSwitch"] = optimization_switch; SetForbiddenClosePassOn(init_options_); return init_options_; } @@ -1286,6 +1302,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & std::string all_tensor_not_empty; std::string auto_multistream_parallel_mode; std::string compile_hybrid_mode; + std::string oo_level; + std::string optimization_switch; auto NpuOptimizer_value = attrs.Find("_NpuOptimizer"); auto enable_data_pre_proc_value = attrs.Find("_enable_data_pre_proc"); @@ -1389,6 +1407,8 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & auto all_tensor_not_empty_value = attrs.Find("_all_tensor_not_empty"); auto auto_multistream_parallel_mode_value = attrs.Find("_auto_multistream_parallel_mode"); auto compile_hybrid_mode_value = attrs.Find("_compile_hybrid_mode"); + auto oo_level_value = attrs.Find("_oo_level"); + auto optimization_switch_value = attrs.Find("_optimization_switch"); if (NpuOptimizer_value != nullptr) { do_npu_optimizer = "1"; if (enable_data_pre_proc_value != nullptr) { @@ -1718,6 +1738,12 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & if (auto_multistream_parallel_mode_value != nullptr) { auto_multistream_parallel_mode = auto_multistream_parallel_mode_value->s(); } + if (oo_level_value != nullptr) { + oo_level = oo_level_value->s(); + } + if (optimization_switch_value != nullptr) { + optimization_switch = optimization_switch_value->s(); + } } all_options["variable_format_optimize"] = variable_format_optimize; @@ -1847,6 +1873,10 @@ std::map NpuAttrs::GetAllAttrOptions(const AttrSlice & all_options["ge.inputBatchCpy"] = input_batch_cpy; all_options["shape_generalization_mode"] = shape_generalization_mode; all_options["compile_hybrid_mode"] = compile_hybrid_mode; + all_options["oo_level"] = oo_level; + all_options["ge.oo.level"] = oo_level; + all_options["optimization_switch"] = optimization_switch; + all_options["ge.optimizationSwitch"] = optimization_switch; return all_options; } @@ -1978,6 +2008,8 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options bool all_tensor_not_empty = false; std::string auto_multistream_parallel_mode; std::string compile_hybrid_mode; + std::string oo_level = "O3"; + std::string optimization_switch; const RewriterConfig &rewrite_options = options.session_options->config.graph_options().rewrite_options(); for (const auto &custom_optimizer : rewrite_options.custom_optimizers()) { if (custom_optimizer.name() == "NpuOptimizer") { @@ -2308,7 +2340,16 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options } } else if (params.count("input_shape") == 0 && params.count("dynamic_dims") == 0 && params.count("dynamic_node_type") == 0) { - // the three parameters are not set normally. + if (params.count("compile_hybrid_mode") > 0) { + compile_hybrid_mode = std::to_string(params.at("compile_hybrid_mode").i()); + if (compile_hybrid_mode == "1") { + ADP_LOG(ERROR) + << "input_shape, dynamic_dims and dynamic_node_type should be set when compile_hybrid_mode is 1"; + LOG(ERROR) << "input_shape, dynamic_dims and dynamic_node_type should be set when compile_hybrid_mode is 1"; + return errors::Internal( + "input_shape, dynamic_dims and dynamic_node_type should be set when compile_hybrid_mode is 1"); + } + } } else { ADP_LOG(FATAL) << "input_shape, dynamic_dims and dynamic_node_type should use together."; LOG(FATAL) << "input_shape, dynamic_dims and dynamic_node_type should use together."; @@ -2573,6 +2614,17 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options init_options_["ge.inputBatchCpy"] = input_batch_cpy_str; init_options_["ge.inputPlacement"] = "DeviceHbm"; } + if (params.count("oo_level") > 0) { + oo_level = params.at("oo_level").s(); + init_options_["oo_level"] = oo_level; + init_options_["ge.oo.level"] = oo_level; + } + if (params.count("optimization_switch") > 0) { + optimization_switch = params.at("optimization_switch").s(); + init_options_["optimization_switch"] = optimization_switch; + init_options_["ge.optimizationSwitch"] = optimization_switch; + } + if (params.count("jit_compile") > 0) { const static std::vector kJitCompileList = {"true", "false", @@ -2671,6 +2723,11 @@ Status NpuAttrs::SetNpuOptimizerAttr(const GraphOptimizationPassOptions &options sess_options["auto_multistream_parallel_mode"] = auto_multistream_parallel_mode; sess_options["ge.autoMultistreamParallelMode"] = auto_multistream_parallel_mode; graph_options["compile_hybrid_mode"] = compile_hybrid_mode; + sess_options["oo_level"] = oo_level; + sess_options["ge.oo.level"] = oo_level; + sess_options["optimization_switch"] = optimization_switch; + sess_options["ge.optimizationSwitch"] = optimization_switch; + init_options_["profiling_mode"] = std::to_string(static_cast(profiling_mode)); init_options_[ge::OPTION_EXEC_PROFILING_MODE] = std::to_string(static_cast(profiling_mode)); init_options_["profiling_options"] = profiling_options; @@ -2792,6 +2849,10 @@ void NpuAttrs::LogOptions(const std::map &options) { // tf场景存在某些不可关闭pass,因此需要默认设置forbidden_close_pass为on,即开启这些不可关闭的pass void NpuAttrs::SetForbiddenClosePassOn(std::map &option) { - option["ge.optimizationSwitch"].append("forbidden_close_pass:on"); + if (option["ge.optimizationSwitch"].empty()) { + option["ge.optimizationSwitch"] = "forbidden_close_pass:on"; + } else { + option["ge.optimizationSwitch"].append(";forbidden_close_pass:on"); + } } } // namespace tensorflow diff --git a/tf_adapter_2.x/CMakeLists.txt b/tf_adapter_2.x/CMakeLists.txt index 04c055b6d22b8be19224ce2ba5e3785490599fd0..165a231bc5e96fe60ebf147930d0b156ad004715 100644 --- a/tf_adapter_2.x/CMakeLists.txt +++ b/tf_adapter_2.x/CMakeLists.txt @@ -36,6 +36,16 @@ else () set(CMAKE_C_FLAGS "${COMPILE_FLAG} ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "${COMPILE_FLAG} ${CMAKE_CXX_FLAGS}") endforeach (COMPILE_FLAG) + + file(COPY ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/register/) + file(COPY ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/external/register/register.h DESTINATION ${CMAKE_CURRENT_LIST_DIR}/inc/include/register/) + configure_file( + ${ASCEND_INSTALLED_PATH}/opensdk/opensdk/include/metadef/common/ge_common/inner_error_codes.h + ${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef/common/ge_common/ge_inner_error_codes.h + COPYONLY + ) + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc) + include_directories(${CMAKE_CURRENT_LIST_DIR}/inc/graph_metadef) endif () include(${CMAKE_CURRENT_LIST_DIR}/cmake/nlohmann_json.cmake) diff --git a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp index 2671a05795a60610450629d031485f5d64104d9f..d1ed19793fb109242e08adfad8e0da82e53d018c 100644 --- a/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp +++ b/tf_adapter_2.x/npu_device/core/npu_wrapper.cpp @@ -98,6 +98,8 @@ const std::map kGlobalConfigOptions = { {"oo_constant_folding", "ge.oo.constantFolding"}, {"input_batch_cpy", "ge.inputBatchCpy"}, {"shape_generalization_mode", "shape_generalization_mode"}, + {"oo_level", "ge.oo.level"}, + {"optimization_switch", "ge.optimizationSwitch"}, // private options {"_distribute.rank_id", ge::OPTION_EXEC_RANK_ID}, {"_distribute.rank_table", ge::OPTION_EXEC_RANK_TABLE_FILE}, diff --git a/tf_adapter_2.x/python/npu_device/configs/npu_config.py b/tf_adapter_2.x/python/npu_device/configs/npu_config.py index b18bf56d9ac7f425ce7e7de028f85b708312b028..da66c4eac6585145ad76082634db12f02159b631 100644 --- a/tf_adapter_2.x/python/npu_device/configs/npu_config.py +++ b/tf_adapter_2.x/python/npu_device/configs/npu_config.py @@ -86,5 +86,7 @@ class NpuConfig(NpuBaseConfig): self.shape_generalization_mode = OptionValue("STRICT", ["STRICT", "FULL", "ADAPTIVE"]) self.all_tensor_not_empty = OptionValue(False, [True, False]) self.auto_multistream_parallel_mode = OptionValue(None, ['cv']) + self.oo_level = OptionValue("O3", ["O0", "O1", "O2", "O3"]) + self.optimization_switch = OptionValue(None, None) super(NpuConfig, self).__init__()