diff --git a/.gitmodules b/.gitmodules index 2fcbd218bcf99f..c678571d22a11e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,8 @@ [submodule "inference-engine/thirdparty/ade"] path = inference-engine/thirdparty/ade url = https://github.com/opencv/ade.git + ignore = dirty +[submodule "inference-engine/thirdparty/ngraph"] + path = inference-engine/thirdparty/ngraph + url = https://github.com/NervanaSystems/ngraph.git + ignore = dirty diff --git a/README.md b/README.md index 5e579bf7fa9896..8d1dd329b887a6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository -[![Stable release](https://img.shields.io/badge/version-2019.R1-green.svg)](https://github.com/opencv/dldt/releases/tag/2019_R1) +[![Stable release](https://img.shields.io/badge/version-2019.R2-green.svg)](https://github.com/opencv/dldt/releases/tag/2019_R2) [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE) This toolkit allows developers to deploy pre-trained deep learning models through a high-level C++ Inference Engine API integrated with application logic. diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt index 1c3d6eab4bb9f5..0d449c9432fdb4 100644 --- a/inference-engine/CMakeLists.txt +++ b/inference-engine/CMakeLists.txt @@ -2,149 +2,52 @@ # SPDX-License-Identifier: Apache-2.0 # -cmake_minimum_required(VERSION 3.5 FATAL_ERROR) - -project(InferenceEngine) - -set(DEV_BUILD TRUE) - -include(CTest) - -## WA for problem with gtest submodule. It cannot detect uint32 type. -## remove Gtest submodule and this two lines together -include (CheckTypeSize) -check_type_size (uint32_t uint32_t LANGUAGE CXX) - -if (UNIX AND NOT APPLE) -set(LINUX TRUE) -endif() - -option (OS_FOLDER "create OS dedicated folder in output" OFF) - -if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") - set (ARCH_FOLDER armv7l) -elseif("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - set (ARCH_FOLDER intel64) -else() - set (ARCH_FOLDER ia32) -endif() - -if (OS_FOLDER) - message ("**** OS FOLDER IS: [${OS_FOLDER}]") - if ("${OS_FOLDER}" STREQUAL "ON") - message ("**** USING OS FOLDER: [${CMAKE_SYSTEM_NAME}]") - set (BIN_FOLDER bin/${CMAKE_SYSTEM_NAME}/${ARCH_FOLDER}) - else() - set (BIN_FOLDER bin/${OS_FOLDER}/${ARCH_FOLDER}) - endif() +if (APPLE) + # due to https://cmake.org/cmake/help/v3.12/policy/CMP0068.html + cmake_minimum_required(VERSION 3.9 FATAL_ERROR) else() - set (BIN_FOLDER bin/${ARCH_FOLDER}) -endif() - -set (IE_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set (CMAKE_MODULE_PATH "${IE_MAIN_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) - -#printing debug messages -include (debug) - -if("${CMAKE_BUILD_TYPE}" STREQUAL "") - debug_message(STATUS "CMAKE_BUILD_TYPE not defined, 'Release' will be used") - set(CMAKE_BUILD_TYPE "Release") -endif() -message(STATUS "BUILD_CONFIGURATION: ${CMAKE_BUILD_TYPE}") - -if(COVERAGE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0") + cmake_minimum_required(VERSION 3.7.2 FATAL_ERROR) endif() -if (UNIX) - SET(LIB_DL ${CMAKE_DL_LIBS}) -endif() - -set (OUTPUT_ROOT ${IE_MAIN_SOURCE_DIR}) - -include(os_flags) - -#resolving dependencies for the project -include (dependencies) +project(InferenceEngine) -set(CMAKE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX}) -set(CMAKE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX}) +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) +set(IE_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -if (WIN32) - # Support CMake multiconfiguration for Visual Studio build - set(IE_BUILD_POSTFIX $<$:${IE_DEBUG_POSTFIX}>$<$:${IE_RELEASE_POSTFIX}>) - set(IE_BUILD_CONFIGURATION $) -else () - if (${CMAKE_BUILD_TYPE} STREQUAL "Debug" ) - set(IE_BUILD_POSTFIX ${IE_DEBUG_POSTFIX}) - else() - set(IE_BUILD_POSTFIX ${IE_RELEASE_POSTFIX}) - endif() - set(IE_BUILD_CONFIGURATION ${CMAKE_BUILD_TYPE}) -endif() +include(CTest) +include(features) -add_definitions(-DIE_BUILD_POSTFIX=\"${IE_BUILD_POSTFIX}\") - -if(NOT(UNIX)) - if (WIN32) - #set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") - #set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") - endif() - set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) - set (CMAKE_LIBRARY_PATH ${OUTPUT_ROOT}/${BIN_FOLDER}) - set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) - set (CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) - set (CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) - set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) - set (LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) - set (LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}) # compatibility issue: linux uses LIBRARY_OUTPUT_PATH, windows uses LIBRARY_OUTPUT_DIRECTORY -else () - set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib) - set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib) - set (CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}) - set (CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}) - set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}) - set (LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib) - set (LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}/lib) -endif() +# include developer package +include(developer_package) -if (APPLE) - set(CMAKE_MACOSX_RPATH 1) -endif(APPLE) +# These options are shared with 3rdparty plugins +# by means of developer package +include(check_features) -#rpath fully disabled -if (NOT ENABLE_PLUGIN_RPATH) - SET (CMAKE_SKIP_RPATH TRUE) -endif() - -#Use solution folders. -set_property(GLOBAL PROPERTY USE_FOLDERS ON) -#message("=====================> ${CMAKE_BUILD_TYPE} <=====================") +# resolving dependencies for the project +include(dependencies) message (STATUS "PROJECT ............................... " ${PROJECT_NAME}) message (STATUS "CMAKE_BINARY_DIR ...................... " ${CMAKE_BINARY_DIR}) message (STATUS "IE_MAIN_SOURCE_DIR .................... " ${IE_MAIN_SOURCE_DIR}) message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR}) message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID}) +message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE}) -include(sdl) - -set (CMAKE_POSITION_INDEPENDENT_CODE ON) - -include (sanitizer) - -include(CheckCXXCompilerFlag) +add_subdirectory(src) -include(cpplint) +if(ENABLE_TESTS) + add_subdirectory(tests) +endif() -add_subdirectory(src) -add_subdirectory(tests) add_subdirectory(thirdparty) -set(InferenceEngine_DIR "${CMAKE_BINARY_DIR}") -#to be able to link -set (LIB_FOLDER ${IE_MAIN_SOURCE_DIR}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib) +add_subdirectory(tools) + +if (ENABLE_SAMPLES) + # hint for find_package(InferenceEngine in the samples folder) + set(InferenceEngine_DIR "${CMAKE_BINARY_DIR}") +endif() # gflags and format_reader targets are kept inside of samples directory and # they must be built even if samples build is disabled (required for tests and tools). diff --git a/inference-engine/README.md b/inference-engine/README.md index 2bbd7e8d4acd8e..cc2738330d9a04 100644 --- a/inference-engine/README.md +++ b/inference-engine/README.md @@ -63,7 +63,7 @@ The software was validated on: git submodule update --recursive ``` 2. Install build dependencies using the `install_dependencies.sh` script in the project root folder. -3. By default, the build enables the Inference Engine GPU plugin to infer models on your Intel® Processor Graphics. This requires you to [Install Intel® Graphics Compute Runtime for OpenCL™ Driver package 19.04.12237](https://github.com/intel/compute-runtime/releases/tag/19.04.12237) before running the build. If you don't want to use the GPU plugin, use the `-DENABLE_CLDNN=ON` CMake build option and skip the installation of the Intel® Graphics Compute Runtime for OpenCL™ Driver. +3. By default, the build enables the Inference Engine GPU plugin to infer models on your Intel® Processor Graphics. This requires you to [Install Intel® Graphics Compute Runtime for OpenCL™ Driver package 19.04.12237](https://github.com/intel/compute-runtime/releases/tag/19.04.12237) before running the build. If you don't want to use the GPU plugin, use the `-DENABLE_CLDNN=OFF` CMake build option and skip the installation of the Intel® Graphics Compute Runtime for OpenCL™ Driver. 4. Create a build folder: ```sh mkdir build && cd build @@ -82,7 +82,7 @@ You can use the following additional build options: - To switch to OpenBLAS\* implementation, use the `GEMM=OPENBLAS` option and `BLAS_INCLUDE_DIRS` and `BLAS_LIBRARIES` CMake options to specify path to the OpenBLAS headers and library. For example use the following options on CentOS\*: `-DGEMM=OPENBLAS -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DBLAS_LIBRARIES=/usr/lib64/libopenblas.so.0`. -- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and `-DMKLROOT=` CMake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded from the [MKL-DNN repository](https://github.com/intel/mkl-dnn/releases/download/v0.17/mklml_lnx_2019.0.1.20180928.tgz). +- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and `-DMKLROOT=` CMake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded from the [MKL-DNN repository](https://github.com/intel/mkl-dnn/releases/download/v0.19/mklml_lnx_2019.0.5.20190502.tgz). - Threading Building Blocks (TBB) is used by default. To build the Inference Engine with OpenMP* threading, set the `-DTHREADING=OMP` option. @@ -165,7 +165,7 @@ Native compilation of the Inference Engine is the most straightforward solution. cmake -DCMAKE_BUILD_TYPE=Release \ -DENABLE_SSE42=OFF \ -DTHREADING=SEQ \ - -DENABLE_GNA=OFF .. && make -j2 + -DENABLE_GNA=OFF .. && make ``` ### Cross Compilation Using Docker* @@ -302,7 +302,7 @@ The software was validated on: 3. Install OpenBLAS: 1. Download [OpenBLAS\*](https://sourceforge.net/projects/openblas/files/v0.2.14/OpenBLAS-v0.2.14-Win64-int64.zip/download) 2. Unzip the downloaded package to a directory on your machine. In this document, this directory is referred to as ``. -4. By default, the build enables the Inference Engine GPU plugin to infer models on your Intel® Processor Graphics. This requires you to [download and install the Intel® Graphics Driver for Windows* [25.20] driver package](https://downloadcenter.intel.com/download/28646/Intel-Graphics-Windows-10-DCH-Drivers?product=80939) before running the build. If you don't want to use the GPU plugin, use the `-DENABLE_CLDNN=ON` CMake build option and skip the installation of the Intel® Graphics Driver. +4. By default, the build enables the Inference Engine GPU plugin to infer models on your Intel® Processor Graphics. This requires you to [download and install the Intel® Graphics Driver for Windows* [25.20] driver package](https://downloadcenter.intel.com/download/28646/Intel-Graphics-Windows-10-DCH-Drivers?product=80939) before running the build. If you don't want to use the GPU plugin, use the `-DENABLE_CLDNN=OFF` CMake build option and skip the installation of the Intel® Graphics Driver. 5. Create build directory: ```sh mkdir build @@ -323,7 +323,7 @@ cmake -G "Visual Studio 15 2017 Win64" -T "Intel C++ Compiler 18.0" ^ - Internal JIT GEMM implementation is used by default. - To switch to OpenBLAS GEMM implementation, use the `-DGEMM=OPENBLAS` CMake option and specify path to OpenBLAS using the `-DBLAS_INCLUDE_DIRS=\include` and `-DBLAS_LIBRARIES=\lib\libopenblas.dll.a` options. Prebuilt OpenBLAS\* package can be downloaded [here](https://sourceforge.net/projects/openblas/files/v0.2.14/OpenBLAS-v0.2.14-Win64-int64.zip/download). mingw64* runtime dependencies can be downloaded [here](https://sourceforge.net/projects/openblas/files/v0.2.14/mingw64_dll.zip/download). -- To switch to the optimized MKL-ML\* GEMM implementation, use the `-DGEMM=MKL` and `-DMKLROOT=` CMake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded from the [MKL-DNN repository](https://github.com/intel/mkl-dnn/releases/download/v0.17/mklml_win_2019.0.1.20180928.zip). +- To switch to the optimized MKL-ML\* GEMM implementation, use the `-DGEMM=MKL` and `-DMKLROOT=` CMake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded from the [MKL-DNN repository](https://github.com/intel/mkl-dnn/releases/download/v0.19/mklml_win_2019.0.5.20190502.zip). - Threading Building Blocks (TBB) is used by default. To build the Inference Engine with OpenMP* threading, set the `-DTHREADING=OMP` option. @@ -385,7 +385,7 @@ The software was validated on: You can use the following additional build options: - Internal JIT GEMM implementation is used by default. -- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and `-DMKLROOT=` cmake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded [here](https://github.com/intel/mkl-dnn/releases/download/v0.17.1/mklml_mac_2019.0.1.20180928.tgz) +- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and `-DMKLROOT=` cmake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded [here](https://github.com/intel/mkl-dnn/releases/download/v0.19/mklml_mac_2019.0.5.20190502.tgz) - Threading Building Blocks (TBB) is used by default. To build the Inference Engine with OpenMP* threading, set the `-DTHREADING=OMP` option. diff --git a/inference-engine/cmake/FindITT.cmake b/inference-engine/cmake/FindITT.cmake new file mode 100644 index 00000000000000..07241548e0f0a7 --- /dev/null +++ b/inference-engine/cmake/FindITT.cmake @@ -0,0 +1,57 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +unset(ITT_INCLUDE_DIR CACHE) +unset(ITT_LIB CACHE) + +if(NOT DEFINED INTEL_VTUNE_DIR AND DEFINED ENV{INTEL_VTUNE_DIR}) + set(INTEL_VTUNE_DIR "$ENV{INTEL_VTUNE_DIR}") +endif() +if(NOT DEFINED INTEL_VTUNE_DIR) + if(EXISTS "/opt/intel/vtune_amplifier_xe/include") + set(INTEL_VTUNE_DIR "/opt/intel/vtune_amplifier_xe") + elseif(EXISTS "/opt/intel/vtune_amplifier/include") + set(INTEL_VTUNE_DIR "/opt/intel/vtune_amplifier") + elseif (EXISTS "C:/Program Files (x86)/IntelSWTools/VTune Amplifier XE") + set(INTEL_VTUNE_DIR "C:/Program Files (x86)/IntelSWTools/VTune Amplifier XE") + elseif (EXISTS "C:/Program Files (x86)/IntelSWTools/VTune Amplifier") + set(INTEL_VTUNE_DIR "C:/Program Files (x86)/IntelSWTools/VTune Amplifier") + elseif (EXISTS "$ENV{HOME}/intel/vtune_amplifier_2019") + set(INTEL_VTUNE_DIR "$ENV{HOME}/intel/vtune_amplifier_2019") + endif() +endif() + +if(DEFINED INTEL_VTUNE_DIR) + message(STATUS "INTEL_VTUNE_DIR = ${INTEL_VTUNE_DIR}") + + find_path(ITT_INCLUDE_DIR + FILES + ittnotify.h + PATHS "${INTEL_VTUNE_DIR}/include/") + + find_library(ITT_LIB + "libittnotify${CMAKE_STATIC_LIBRARY_SUFFIX}" + PATHS ${INTEL_VTUNE_DIR}/lib64) + + set(Located_ITT_LIBS ${ITT_LIB}) + set(Located_ITT_INCLUDE_DIRS ${ITT_INCLUDE_DIR}) +else() + message(STATUS "INTEL_VTUNE_DIR is not defined") +endif() + +# Handle find_package() arguments, and set INTEL_ITT_FOUND +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(INTEL_ITT + REQUIRED_VARS + Located_ITT_INCLUDE_DIRS + Located_ITT_LIBS) + +if(INTEL_ITT_FOUND) + add_library(ittnotify STATIC IMPORTED GLOBAL) + set_target_properties(ittnotify PROPERTIES IMPORTED_LOCATION "${Located_ITT_LIBS}" + INTERFACE_INCLUDE_DIRECTORIES ${Located_ITT_INCLUDE_DIRS} + INTERFACE_COMPILE_DEFINITIONS ENABLE_PROFILING_ITT) + + set(INTEL_ITT_LIBS ittnotify ${CMAKE_DL_LIBS}) +endif() diff --git a/inference-engine/cmake/FindlibGNA.cmake b/inference-engine/cmake/FindlibGNA.cmake index eccf7591797cf3..4d7978241addc5 100644 --- a/inference-engine/cmake/FindlibGNA.cmake +++ b/inference-engine/cmake/FindlibGNA.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/arm.toolchain.cmake b/inference-engine/cmake/arm.toolchain.cmake index 2890f1aa86cffe..a2bf53480d423f 100644 --- a/inference-engine/cmake/arm.toolchain.cmake +++ b/inference-engine/cmake/arm.toolchain.cmake @@ -1,3 +1,7 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + set(CMAKE_SYSTEM_NAME Linux) set(CMAKE_SYSTEM_PROCESSOR armv7l) diff --git a/inference-engine/cmake/arm64.toolchain.cmake b/inference-engine/cmake/arm64.toolchain.cmake new file mode 100644 index 00000000000000..9d3cc6e3976397 --- /dev/null +++ b/inference-engine/cmake/arm64.toolchain.cmake @@ -0,0 +1,14 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/inference-engine/cmake/check_features.cmake b/inference-engine/cmake/check_features.cmake index 0b137b651f05d7..71c9007b032152 100644 --- a/inference-engine/cmake/check_features.cmake +++ b/inference-engine/cmake/check_features.cmake @@ -1,12 +1,7 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # -include("features") -include("mode") -include("itt") - #64 bits platform if ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") message(STATUS "Detected 64 bit architecture") @@ -18,8 +13,7 @@ else() SET(ARCH_32 ON) endif() -if (ARCH_64) -else() +if (NOT ARCH_64) if (UNIX OR APPLE) SET(ENABLE_CLDNN OFF) endif() @@ -31,6 +25,7 @@ if (APPLE) set(ENABLE_GNA OFF) set(ENABLE_CLDNN OFF) SET(ENABLE_MYRIAD OFF) + SET(ENABLE_VPU OFF) endif() @@ -42,21 +37,6 @@ if (WIN32) endif() endif() -# Linux specific - not all OS'es are supported -if (LINUX) - include("linux_name") - get_linux_name(LINUX_OS_NAME) - if (LINUX_OS_NAME) - if (NOT( - ${LINUX_OS_NAME} STREQUAL "Ubuntu 14.04" OR - ${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04" OR - ${LINUX_OS_NAME} STREQUAL "CentOS 7")) - endif() - else () - message(WARNING "Cannot detect Linux OS via reading /etc/*-release:\n ${release_data}") - endif () -endif () - if (NOT ENABLE_MKL_DNN) set(ENABLE_MKL OFF) endif() @@ -65,10 +45,6 @@ if (NOT ENABLE_VPU) set(ENABLE_MYRIAD OFF) endif() -if (NOT ENABLE_MYRIAD) - set(ENABLE_VPU OFF) -endif() - #next section set defines to be accesible in c++/c code for certain feature if (ENABLE_PROFILING_RAW) add_definitions(-DENABLE_PROFILING_RAW=1) @@ -82,22 +58,18 @@ if (ENABLE_MYRIAD) add_definitions(-DENABLE_MYRIAD=1) endif() -if (ENABLE_MYX_PCIE AND ENABLE_MYRIAD) - add_definitions(-DENABLE_MYX_PCIE=1) -endif() - if (ENABLE_MYRIAD_NO_BOOT AND ENABLE_MYRIAD ) add_definitions(-DENABLE_MYRIAD_NO_BOOT=1) endif() -if (ENABLE_MYX_PCIE AND ENABLE_MYRIAD_NO_BOOT) - message(FATAL_ERROR "ENABLE_MYX_PCIE and ENABLE_MYRIAD_NO_BOOT can't be enabled at the same time") -endif() - if (ENABLE_MKL_DNN) add_definitions(-DENABLE_MKL_DNN=1) endif() +if (ENABLE_UNICODE_PATH_SUPPORT) + add_definitions(-DENABLE_UNICODE_PATH_SUPPORT=1) +endif() + if (ENABLE_GNA) add_definitions(-DENABLE_GNA) endif() diff --git a/inference-engine/cmake/config.cmake.in b/inference-engine/cmake/config.cmake.in index 7c3459f5acefa3..d9a6918d6ce96d 100644 --- a/inference-engine/cmake/config.cmake.in +++ b/inference-engine/cmake/config.cmake.in @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/cppcheck.cmake b/inference-engine/cmake/cppcheck.cmake new file mode 100644 index 00000000000000..9b2b5c5a2928ec --- /dev/null +++ b/inference-engine/cmake/cppcheck.cmake @@ -0,0 +1,28 @@ +# Copyright (C) 2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +if(ENABLE_CPPCHECK) + find_program(CPPCHECK_EXECUTABLE cppcheck) + + if(NOT CPPCHECK_EXECUTABLE) + message(WARNING "cppcheck was not found : disable static analysis") + set(ENABLE_CPPCHECK OFF) + endif() +endif() + +function(add_cppcheck) + if(NOT ENABLE_CPPCHECK) + return() + endif() + + set_property( + TARGET ${ARGN} + PROPERTY CXX_CPPCHECK + ${CPPCHECK_EXECUTABLE} + "--suppress=*:*/temp/*" + "--suppress=*:*/thirdparty/*" + "--error-exitcode=1" + "--template={file}:{line}: error: [cppcheck:{severity}] {message}" + "--quiet") +endfunction() diff --git a/inference-engine/cmake/cpplint.cmake b/inference-engine/cmake/cpplint.cmake index f4eca4c348f7b4..ff657030539a14 100644 --- a/inference-engine/cmake/cpplint.cmake +++ b/inference-engine/cmake/cpplint.cmake @@ -1,13 +1,12 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # if(ENABLE_CPPLINT) find_package(PythonInterp 2.7 EXACT) - if(NOT PYTHONINTERP_FOUND) - message(WARNING "Python was not found (required for cpplint check)") + if(NOT PYTHONINTERP_FOUND OR NOT PYTHON_VERSION_MAJOR EQUAL 2) + message(WARNING "Python 2.7 was not found (required for cpplint check)") set(ENABLE_CPPLINT OFF) endif() endif() @@ -127,7 +126,7 @@ function(add_cpplint_report_target) -D "OUTPUT_FILE=${cppcheck_output_file}" -P "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_to_cppcheck_xml.cmake" DEPENDS - ${cpplint_output_file} + "${cpplint_output_file}" "${IE_MAIN_SOURCE_DIR}/scripts/cpplint_to_cppcheckxml.py" "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_to_cppcheck_xml.cmake" COMMENT diff --git a/inference-engine/cmake/cpplint_html.cmake b/inference-engine/cmake/cpplint_html.cmake index 55992d8b2dc288..9bd074ccbb33be 100644 --- a/inference-engine/cmake/cpplint_html.cmake +++ b/inference-engine/cmake/cpplint_html.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # @@ -22,9 +21,9 @@ execute_process( file(READ "${REPORT_DIR}/index.html" cur_file_content) -string(REPLACE "Cppcheck" "cpplint" cur_file_content ${cur_file_content}) -string(REPLACE "a tool for static C/C++ code analysis" "an open source lint-like tool from Google" cur_file_content ${cur_file_content}) -string(REPLACE "http://cppcheck.sourceforge.net" "http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py" cur_file_content ${cur_file_content}) -string(REPLACE "IRC: irc://irc.freenode.net/cppcheck" " " cur_file_content ${cur_file_content}) +string(REPLACE "Cppcheck" "cpplint" cur_file_content "${cur_file_content}") +string(REPLACE "a tool for static C/C++ code analysis" "an open source lint-like tool from Google" cur_file_content "${cur_file_content}") +string(REPLACE "http://cppcheck.sourceforge.net" "http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py" cur_file_content "${cur_file_content}") +string(REPLACE "IRC: irc://irc.freenode.net/cppcheck" " " cur_file_content "${cur_file_content}") file(WRITE "${REPORT_DIR}/index.html" "${cur_file_content}") diff --git a/inference-engine/cmake/cpplint_merge.cmake b/inference-engine/cmake/cpplint_merge.cmake index da871573622bb9..75428d54ce2d2c 100644 --- a/inference-engine/cmake/cpplint_merge.cmake +++ b/inference-engine/cmake/cpplint_merge.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/cpplint_run.cmake b/inference-engine/cmake/cpplint_run.cmake index f9c9ec58f7898f..5715b57555fc52 100644 --- a/inference-engine/cmake/cpplint_run.cmake +++ b/inference-engine/cmake/cpplint_run.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # @@ -22,12 +21,12 @@ execute_process( message("${output}") # Store cpplint output to file (replace problematic symbols) -string(REPLACE "\"" ""\;" output ${output}) -string(REPLACE "<" "<\;" output ${output}) -string(REPLACE ">" ">\;" output ${output}) -string(REPLACE "'" "&apos\;" output ${output}) -string(REPLACE "&" "&\;" output ${output}) -file(WRITE "${OUTPUT_FILE}" ${output}) +string(REPLACE "\"" ""\;" output "${output}") +string(REPLACE "<" "<\;" output "${output}") +string(REPLACE ">" ">\;" output "${output}") +string(REPLACE "'" "&apos\;" output "${output}") +string(REPLACE "&" "&\;" output "${output}") +file(WRITE "${OUTPUT_FILE}" "${output}") if(NOT SKIP_RETURN_CODE) # Pass through the cpplint return code diff --git a/inference-engine/cmake/cpplint_to_cppcheck_xml.cmake b/inference-engine/cmake/cpplint_to_cppcheck_xml.cmake index 6651b93a639a14..88d711cd9cb7cf 100644 --- a/inference-engine/cmake/cpplint_to_cppcheck_xml.cmake +++ b/inference-engine/cmake/cpplint_to_cppcheck_xml.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/debug.cmake b/inference-engine/cmake/debug.cmake index 9aeb2a581fd95c..f9e759f812f7f7 100644 --- a/inference-engine/cmake/debug.cmake +++ b/inference-engine/cmake/debug.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # @@ -9,7 +8,6 @@ function (debug_message) endif() endfunction() - function(clean_message type) string (REPLACE ";" "" output_string "${ARGN}") execute_process(COMMAND ${CMAKE_COMMAND} -E echo "${output_string}") @@ -47,7 +45,12 @@ function (log_rpath_remove_top component component_remove_top lib lib_remove_top # debug_message(STATUS "LIB-OUT=${lib_dir}") # debug_message(STATUS "TOPLIB-OUT=${top_lib_dir}") - + + if (WIN32) + string (TOLOWER "${top_lib_dir}" top_lib_dir) + string (TOLOWER "${lib_dir}" lib_dir) + endif() + string (REPLACE "${top_lib_dir}" "" component_dir "${lib_dir}") set(RPATH_INFO "${component}=${component_dir}") @@ -56,9 +59,7 @@ function (log_rpath_remove_top component component_remove_top lib lib_remove_top endfunction() function (log_rpath_from_dir component lib_dir) - if(NOT APPLE) - log_rpath_remove_top("${component}" TRUE "${lib_dir}" FALSE) - endif() + log_rpath_remove_top("${component}" TRUE "${lib_dir}" FALSE) endfunction() function (log_rpath component lib_path) diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake index d3cdf9588e4ca3..00a5b8e0f73fec 100644 --- a/inference-engine/cmake/dependencies.cmake +++ b/inference-engine/cmake/dependencies.cmake @@ -1,33 +1,13 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # cmake_policy(SET CMP0054 NEW) -#features trigger supported by build system -include(check_features) -include(debug) - #we have number of dependencies stored on ftp include(dependency_solver) -#prepare temporary folder -if (DEFINED ENV{${DL_SDK_TEMP}} AND NOT $ENV{${DL_SDK_TEMP}} STREQUAL "") - if (WIN32) - string(REPLACE "\\" "\\\\" TEMP $ENV{${DL_SDK_TEMP}}) - else(WIN32) - set(TEMP $ENV{${DL_SDK_TEMP}}) - endif(WIN32) - - if (ENABLE_ALTERNATIVE_TEMP) - set(ALTERNATIVE_PATH ${IE_MAIN_SOURCE_DIR}/temp) - endif() -else () - message(STATUS "DL_SDK_TEMP envionment not set") - set(TEMP ${IE_MAIN_SOURCE_DIR}/temp) -endif () - +set_temp_directory(TEMP "${IE_MAIN_SOURCE_DIR}") include(ExternalProject) @@ -37,9 +17,14 @@ else() set(MODELS_BRANCH "master") endif() +include(linux_name) +if(COMMAND get_linux_name) + get_linux_name(LINUX_OS_NAME) +endif() + if (ENABLE_MYRIAD) RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2450 - ARCHIVE_UNIFIED firmware_ma2450_491.zip + ARCHIVE_UNIFIED firmware_ma2450_676.zip TARGET_PATH "${TEMP}/vpu/firmware/ma2450" ENVIRONMENT "VPU_FIRMWARE_MA2450" FOLDER) @@ -47,12 +32,12 @@ if (ENABLE_MYRIAD) endif () if (ENABLE_MYRIAD) - RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2480 - ARCHIVE_UNIFIED firmware_ma2480_mdk_R7_9.zip - TARGET_PATH "${TEMP}/vpu/firmware/ma2480" - ENVIRONMENT "VPU_FIRMWARE_MA2480" + RESOLVE_DEPENDENCY(VPU_FIRMWARE_MA2X8X + ARCHIVE_UNIFIED firmware_ma2x8x_mdk_R8_9.zip + TARGET_PATH "${TEMP}/vpu/firmware/ma2x8x" + ENVIRONMENT "VPU_FIRMWARE_MA2X8X" FOLDER) - debug_message(STATUS "ma2480=" ${VPU_FIRMWARE_MA2480}) + debug_message(STATUS "ma2x8x=" ${VPU_FIRMWARE_MA2X8X}) endif () ## enable cblas_gemm from OpenBLAS package @@ -103,7 +88,7 @@ debug_message(STATUS "intel_omp=" ${OMP}) endif () ## TBB package -if (THREADING STREQUAL "TBB") +if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") if (WIN32) #TODO: add target_path to be platform specific as well, to avoid following if RESOLVE_DEPENDENCY(TBB @@ -128,57 +113,53 @@ debug_message(STATUS "tbb=" ${TBB}) endif () if (ENABLE_OPENCV) + set(OPENCV_VERSION "4.1.1") + set(OPENCV_BUILD "595") + set(OPENCV_SUFFIX "") if (WIN32) RESOLVE_DEPENDENCY(OPENCV - ARCHIVE_WIN "opencv_4.1.0-0437.zip" - TARGET_PATH "${TEMP}/opencv_4.1.0" + ARCHIVE_WIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}.zip" + TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}" ENVIRONMENT "OpenCV_DIR" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*") - log_rpath_from_dir(OPENCV "\\opencv_4.1.0\\bin") + log_rpath_from_dir(OPENCV "\\opencv_${OPENCV_VERSION}\\bin") set( ENV{OpenCV_DIR} ${OPENCV}/cmake ) elseif(APPLE) RESOLVE_DEPENDENCY(OPENCV - ARCHIVE_MAC "opencv_4.1.0-0437_osx.tar.xz" - TARGET_PATH "${TEMP}/opencv_4.1.0_osx" + ARCHIVE_MAC "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_osx.tar.xz" + TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_osx" ENVIRONMENT "OpenCV_DIR" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*") - log_rpath_from_dir(OPENCV "opencv_4.1.0_osx/lib") + log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_osx/lib") set( ENV{OpenCV_DIR} ${OPENCV}/cmake ) elseif(LINUX) -if (${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04") - RESOLVE_DEPENDENCY(OPENCV - ARCHIVE_LIN "opencv_4.1.0-0437_ubuntu16.tar.xz" - TARGET_PATH "${TEMP}/opencv_4.1.0_ubuntu16" - ENVIRONMENT "OpenCV_DIR" - VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*") - log_rpath_from_dir(OPENCV "opencv_4.1.0_ubuntu16/lib") -elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 18.04") - RESOLVE_DEPENDENCY(OPENCV - ARCHIVE_LIN "opencv_4.1.0-0437_ubuntu18.tar.xz" - TARGET_PATH "${TEMP}/opencv_4.1.0_ubuntu18" - ENVIRONMENT "OpenCV_DIR" - VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*") - log_rpath_from_dir(OPENCV "opencv_4.1.0_ubuntu18/lib") -elseif (${LINUX_OS_NAME} STREQUAL "CentOS 7") - RESOLVE_DEPENDENCY(OPENCV - ARCHIVE_LIN "opencv_4.1.0-0437_centos7.tar.xz" - TARGET_PATH "${TEMP}/opencv_4.1.0_centos" - ENVIRONMENT "OpenCV_DIR" - VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*") - log_rpath_from_dir(OPENCV "opencv_4.1.0_centos/lib") -elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l" AND - (${LINUX_OS_NAME} STREQUAL "Debian 9" OR - ${LINUX_OS_NAME} STREQUAL "Raspbian 9")) + if (${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04") + set(OPENCV_SUFFIX "ubuntu16") + elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 18.04") + set(OPENCV_SUFFIX "ubuntu18") + elseif (${LINUX_OS_NAME} STREQUAL "CentOS 7") + set(OPENCV_SUFFIX "centos7") + elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l" AND + (${LINUX_OS_NAME} STREQUAL "Debian 9" OR + ${LINUX_OS_NAME} STREQUAL "Raspbian 9" OR + ${LINUX_OS_NAME} STREQUAL "Debian 10" OR + ${LINUX_OS_NAME} STREQUAL "Raspbian 10")) + set(OPENCV_SUFFIX "debian9arm") + endif() +endif() + +if (OPENCV_SUFFIX) RESOLVE_DEPENDENCY(OPENCV - ARCHIVE_LIN "opencv_4.1.0-0437_debian9arm.tar.xz" - TARGET_PATH "${TEMP}/opencv_4.1.0_debian9arm" + ARCHIVE_LIN "opencv_${OPENCV_VERSION}-${OPENCV_BUILD}_${OPENCV_SUFFIX}.tar.xz" + TARGET_PATH "${TEMP}/opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}" ENVIRONMENT "OpenCV_DIR" VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*") - log_rpath_from_dir(OPENCV "opencv_4.1.0_debian9arm/lib") -endif() + log_rpath_from_dir(OPENCV "opencv_${OPENCV_VERSION}_${OPENCV_SUFFIX}/lib") set( ENV{OpenCV_DIR} ${OPENCV}/cmake ) endif() + debug_message(STATUS "opencv=" ${OPENCV}) +set(OpenCV_DIR "${OPENCV}" CACHE PATH "Path to OpenCV in temp directory") endif() @@ -191,16 +172,16 @@ if (ENABLE_GNA) endif() configure_file( - "${CMAKE_SOURCE_DIR}/cmake/share/InferenceEngineConfig.cmake.in" + "${PROJECT_SOURCE_DIR}/cmake/share/InferenceEngineConfig.cmake.in" "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig.cmake" @ONLY) configure_file( - "${CMAKE_SOURCE_DIR}/cmake/share/InferenceEngineConfig-version.cmake.in" + "${PROJECT_SOURCE_DIR}/cmake/share/InferenceEngineConfig-version.cmake.in" "${CMAKE_BINARY_DIR}/share/InferenceEngineConfig-version.cmake" COPYONLY) configure_file( - "${CMAKE_SOURCE_DIR}/cmake/ie_parallel.cmake" + "${PROJECT_SOURCE_DIR}/cmake/ie_parallel.cmake" "${CMAKE_BINARY_DIR}/share/ie_parallel.cmake" COPYONLY) diff --git a/inference-engine/cmake/dependency_solver.cmake b/inference-engine/cmake/dependency_solver.cmake index 178b379ff73d6e..552e1f22c2675a 100644 --- a/inference-engine/cmake/dependency_solver.cmake +++ b/inference-engine/cmake/dependency_solver.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # @@ -106,6 +105,8 @@ function (RESOLVE_DEPENDENCY NAME_OF_CMAKE_VAR) set (FOLDER FALSE) endif() + + #for each dependency type have to do separate things if (ARCHIVE_WIN OR ARCHIVE_LIN OR ARCHIVE_MAC OR ARCHIVE OR ARCHIVE_UNIFIED) if (NOT DEFINED TARGET_PATH) diff --git a/inference-engine/cmake/developer_package.cmake b/inference-engine/cmake/developer_package.cmake new file mode 100644 index 00000000000000..52e0fefd0b078c --- /dev/null +++ b/inference-engine/cmake/developer_package.cmake @@ -0,0 +1,161 @@ +# Copyright (C) 2018 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# printing debug messages +include(debug) + +if (UNIX AND NOT APPLE) + set(LINUX TRUE) +endif() + +string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH_FOLDER) +if(ARCH_FOLDER STREQUAL "x86_64" OR ARCH_FOLDER STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64 + set(ARCH_FOLDER intel64) +elseif(ARCH_FOLDER STREQUAL "i386") + set(ARCH_FOLDER ia32) +endif() + +if(OS_FOLDER) + message ("**** OS FOLDER IS: [${OS_FOLDER}]") + if("${OS_FOLDER}" STREQUAL "ON") + message ("**** USING OS FOLDER: [${CMAKE_SYSTEM_NAME}]") + set(BIN_FOLDER "bin/${CMAKE_SYSTEM_NAME}/${ARCH_FOLDER}") + else() + set(BIN_FOLDER "bin/${OS_FOLDER}/${ARCH_FOLDER}") + endif() +else() + set(BIN_FOLDER "bin/${ARCH_FOLDER}") +endif() + +if("${CMAKE_BUILD_TYPE}" STREQUAL "") + debug_message(STATUS "CMAKE_BUILD_TYPE not defined, 'Release' will be used") + set(CMAKE_BUILD_TYPE "Release") +endif() + +if(COVERAGE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage -O0") +endif() + +if(UNIX) + SET(LIB_DL ${CMAKE_DL_LIBS}) +endif() + +set(OUTPUT_ROOT ${IE_MAIN_SOURCE_DIR}) + +# Enable postfixes for Debug/Release builds +set(IE_DEBUG_POSTFIX_WIN "d") +set(IE_RELEASE_POSTFIX_WIN "") +set(IE_DEBUG_POSTFIX_LIN "") +set(IE_RELEASE_POSTFIX_LIN "") +set(IE_DEBUG_POSTFIX_MAC "d") +set(IE_RELEASE_POSTFIX_MAC "") + +if(WIN32) + set(IE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX_WIN}) + set(IE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX_WIN}) +elseif(APPLE) + set(IE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX_MAC}) + set(IE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX_MAC}) +else() + set(IE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX_LIN}) + set(IE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX_LIN}) +endif() + +set(CMAKE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX}) +set(CMAKE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX}) + +if (WIN32) + # Support CMake multiconfiguration for Visual Studio build + set(IE_BUILD_POSTFIX $<$:${IE_DEBUG_POSTFIX}>$<$:${IE_RELEASE_POSTFIX}>) + set(IE_BUILD_CONFIGURATION $) +else () + if (${CMAKE_BUILD_TYPE} STREQUAL "Debug" ) + set(IE_BUILD_POSTFIX ${IE_DEBUG_POSTFIX}) + else() + set(IE_BUILD_POSTFIX ${IE_RELEASE_POSTFIX}) + endif() + set(IE_BUILD_CONFIGURATION ${CMAKE_BUILD_TYPE}) +endif() +message(STATUS "BUILD_CONFIGURATION: ${IE_BUILD_CONFIGURATION}") + +add_definitions(-DIE_BUILD_POSTFIX=\"${IE_BUILD_POSTFIX}\") + +if(NOT UNIX) + if (WIN32) + # set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") + # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") + endif() + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + set(CMAKE_LIBRARY_PATH ${OUTPUT_ROOT}/${BIN_FOLDER}) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + set(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + set(LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}) # compatibility issue: linux uses LIBRARY_OUTPUT_PATH, windows uses LIBRARY_OUTPUT_DIRECTORY +else() + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib) + set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}) + set(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}) + set(LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib) + set(LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}/lib) +endif() + +if(APPLE) + set(CMAKE_MACOSX_RPATH 1) +endif(APPLE) + +# rpath fully disabled +if (NOT ENABLE_PLUGIN_RPATH) + set(CMAKE_SKIP_RPATH TRUE) +endif() + +# prepare temporary folder +function(set_temp_directory temp_variable source_tree_dir) + if (DEFINED ENV{${DL_SDK_TEMP}} AND NOT $ENV{${DL_SDK_TEMP}} STREQUAL "") + if (WIN32) + string(REPLACE "\\" "\\\\" temp $ENV{${DL_SDK_TEMP}}) + else(WIN32) + set(temp $ENV{${DL_SDK_TEMP}}) + endif(WIN32) + + if (ENABLE_ALTERNATIVE_TEMP) + set(ALTERNATIVE_PATH ${source_tree_dir}/temp) + endif() + else () + message(STATUS "DL_SDK_TEMP envionment not set") + set(temp ${source_tree_dir}/temp) + endif() + + set("${temp_variable}" "${temp}" PARENT_SCOPE) + if(ALTERNATIVE_PATH) + set(ALTERNATIVE_PATH "${ALTERNATIVE_PATH}" PARENT_SCOPE) + endif() +endfunction() + +# Use solution folders +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +include(os_flags) +include(sdl) +include(sanitizer) +include(cpplint) +include(cppcheck) + +function(set_ci_build_number) + set(IE_MAIN_SOURCE_DIR "${CMAKE_SOURCE_DIR}") + include(version) + set(CI_BUILD_NUMBER "${CI_BUILD_NUMBER}" PARENT_SCOPE) +endfunction() +set_ci_build_number() + +if(ENABLE_PROFILING_ITT) + find_package(ITT REQUIRED) +endif() + +include(plugins/plugins) diff --git a/inference-engine/cmake/developer_package_config.cmake.in b/inference-engine/cmake/developer_package_config.cmake.in new file mode 100644 index 00000000000000..fd0184b5f49e1c --- /dev/null +++ b/inference-engine/cmake/developer_package_config.cmake.in @@ -0,0 +1,48 @@ +# Copyright (C) 2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(IE_MAIN_SOURCE_DIR "@CMAKE_SOURCE_DIR@") +file(TO_CMAKE_PATH "${CMAKE_CURRENT_LIST_DIR}" cache_path) + +# inherit OpenCV from main IE project +load_cache("${cache_path}" READ_WITH_PREFIX "" OpenCV_DIR) +find_package(OpenCV COMPONENTS imgcodecs) + +# Targets + +include("${CMAKE_CURRENT_LIST_DIR}/targets_developer.cmake") + +# add additional interface include directories needed for plugin development +if(NOT TARGET IE::inference_engine) + message(FATAL_ERROR "The target IE::inference_engine does not exist") +endif() + +set(ie_plugin_headers "${IE_MAIN_SOURCE_DIR}/src/inference_engine") +set_property(TARGET IE::inference_engine APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "${ie_plugin_headers}") +set_property(TARGET IE::inference_engine PROPERTY IMPORTED_GLOBAL TRUE) + +get_target_property(InferenceEngine_INCLUDE_DIRS IE::inference_engine INTERFACE_INCLUDE_DIRECTORIES) +set(InferenceEngine_LIBRARIES IE::inference_engine) + +# Variables to export in plugin's projects + +set(ie_options "@IE_OPTIONS@;CMAKE_BUILD_TYPE") + +load_cache("${cache_path}" READ_WITH_PREFIX "" ${ie_options}) + +message(STATUS "The following CMake options are exported from Inference Engine Developer package") +message("") +foreach(option IN LISTS ie_options) + message(" ${option}: ${${option}}") +endforeach() +message("") + +# +# Common cmake includes +# + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake;${IE_MAIN_SOURCE_DIR}/cmake") + +# generic stuff from developer package +include(developer_package) diff --git a/inference-engine/cmake/download.cmake b/inference-engine/cmake/download.cmake index b5f6bc74c8ac34..6f87f0daa7ae2f 100644 --- a/inference-engine/cmake/download.cmake +++ b/inference-engine/cmake/download.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/download_and_apply.cmake b/inference-engine/cmake/download_and_apply.cmake index d4869e4d816296..b39b7636c11d98 100644 --- a/inference-engine/cmake/download_and_apply.cmake +++ b/inference-engine/cmake/download_and_apply.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/download_and_check.cmake b/inference-engine/cmake/download_and_check.cmake index 5f4e49c1faec08..f808e232f4effa 100644 --- a/inference-engine/cmake/download_and_check.cmake +++ b/inference-engine/cmake/download_and_check.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/download_and_extract.cmake b/inference-engine/cmake/download_and_extract.cmake index 27af8f8dcf5281..bd837be292c27a 100644 --- a/inference-engine/cmake/download_and_extract.cmake +++ b/inference-engine/cmake/download_and_extract.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # @@ -146,7 +145,7 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked if(DEFINED ENV{IE_PATH_TO_DEPS}) set(URL "$ENV{IE_PATH_TO_DEPS}/${RELATIVE_URL}") else() - set(URL "https://download.01.org/opencv/2019/openvinotoolkit/R1/inference_engine/${RELATIVE_URL}") + set(URL "https://download.01.org/opencv/2019/openvinotoolkit/R2/inference_engine/${RELATIVE_URL}") endif() #no message on recursive calls diff --git a/inference-engine/cmake/extract.cmake b/inference-engine/cmake/extract.cmake index 2aa6fd45583f96..2522eaf3b7dcde 100644 --- a/inference-engine/cmake/extract.cmake +++ b/inference-engine/cmake/extract.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/features.cmake b/inference-engine/cmake/features.cmake index 9498744dc4054c..9fa537dbff439f 100644 --- a/inference-engine/cmake/features.cmake +++ b/inference-engine/cmake/features.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # @@ -8,7 +7,6 @@ include (options) #this options are aimed to optimize build time on development system #backed targets - ie_option (ENABLE_GNA "GNA support for inference engine" ON) ie_option (ENABLE_MKL_DNN "MKL-DNN plugin for inference engine" ON) @@ -31,57 +29,37 @@ list (APPEND IE_OPTIONS GEMM) # "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ" if (NOT THREADING STREQUAL "TBB" + AND NOT THREADING STREQUAL "TBB_AUTO" AND NOT THREADING STREQUAL "OMP" AND NOT THREADING STREQUAL "SEQ") set (THREADING "TBB") - message(STATUS "THREADING should be set to TBB, OMP or SEQ. Default option is " ${THREADING}) + message(STATUS "THREADING should be set to TBB, TBB_AUTO, OMP or SEQ. Default option is " ${THREADING}) endif() set(THREADING "${THREADING}" CACHE STRING "Threading" FORCE) list (APPEND IE_OPTIONS THREADING) -# Enable postfixes for Debug/Release builds -set (IE_DEBUG_POSTFIX_WIN "d") -set (IE_RELEASE_POSTFIX_WIN "") -set (IE_DEBUG_POSTFIX_LIN "") -set (IE_RELEASE_POSTFIX_LIN "") -set (IE_DEBUG_POSTFIX_MAC "d") -set (IE_RELEASE_POSTFIX_MAC "") - -if (WIN32) - set (IE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX_WIN}) - set (IE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX_WIN}) -elseif(APPLE) - set (IE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX_MAC}) - set (IE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX_MAC}) -else() - set (IE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX_LIN}) - set (IE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX_LIN}) -endif() -set(IE_DEBUG_POSTFIX "${IE_DEBUG_POSTFIX}" CACHE STRING "Debug postfix" FORCE) -list (APPEND IE_OPTIONS IE_DEBUG_POSTFIX) -set(IE_RELEASE_POSTFIX "${IE_RELEASE_POSTFIX}" CACHE STRING "Release postfix" FORCE) -list (APPEND IE_OPTIONS IE_RELEASE_POSTFIX) - ie_option (ENABLE_VPU "vpu targeted plugins for inference engine" ON) ie_option (ENABLE_MYRIAD "myriad targeted plugin for inference engine" ON) -ie_option (ENABLE_MYX_PCIE "myriad plugin with support PCIE device" OFF) - ie_option (ENABLE_MYRIAD_NO_BOOT "myriad plugin will skip device boot" OFF) ie_option (ENABLE_TESTS "unit and functional tests" OFF) -ie_option (ENABLE_GAPI_TESTS "unit tests for GAPI kernels" OFF) +ie_option (ENABLE_GAPI_TESTS "tests for GAPI kernels" OFF) ie_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF) +ie_option (ENABLE_MYRIAD_MVNC_TESTS "functional and behavior tests for mvnc api" OFF) + ie_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON) ie_option (ENABLE_SAMPLES_CORE "console samples core library" ON) ie_option (ENABLE_SANITIZER "enable checking memory errors via AddressSanitizer" OFF) +ie_option (ENABLE_FUZZING "instrument build for fuzzing" OFF) + ie_option (COVERAGE "enable code coverage" OFF) ie_option (ENABLE_STRESS_UNIT_TESTS "stress unit tests" OFF) @@ -108,10 +86,28 @@ ie_option (ENABLE_DEBUG_SYMBOLS "generates symbols for debugging" OFF) ie_option (ENABLE_PYTHON "enables ie python bridge build" OFF) +ie_option (DEVELOPMENT_PLUGIN_MODE "Disabled build of all plugins" OFF) + ie_option (TREAT_WARNING_AS_ERROR "Treat build warnings as errors" ON) -ie_option(ENABLE_CPPLINT "Enable cpplint checks during the build" OFF) -ie_option(ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF) +ie_option (ENABLE_UNICODE_PATH_SUPPORT "Enable loading models from Unicode paths" ON) + +if (UNIX AND NOT APPLE AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.3) + set(ENABLE_UNICODE_PATH_SUPPORT OFF) +endif() + +if (UNIX AND NOT APPLE) + ie_option(ENABLE_CPPLINT "Enable cpplint checks during the build" ON) + ie_option(ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF) +else() + set(ENABLE_CPPLINT OFF) +endif() + +if (UNIX AND NOT APPLE AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.10) + ie_option(ENABLE_CPPCHECK "Enable cppcheck during the build" ON) +else() + set(ENABLE_CPPCHECK OFF) +endif() #environment variables used diff --git a/inference-engine/cmake/fuzzing.cmake b/inference-engine/cmake/fuzzing.cmake new file mode 100644 index 00000000000000..bc732862c52e00 --- /dev/null +++ b/inference-engine/cmake/fuzzing.cmake @@ -0,0 +1,30 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +function(enable_fuzzing) + # Enable (libFuzzer)[https://llvm.org/docs/LibFuzzer.html] if supported. + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT WIN32) + # Communicate libfuzzer is enabled + set(WITH_LIBFUZZER ON PARENT_SCOPE) + add_compile_definitions(WITH_LIBFUZZER) + + # Enable libfuzzer and code coverage + set(FUZZING_COMPILER_FLAGS "-fsanitize=fuzzer-no-link -fprofile-instr-generate -fcoverage-mapping") + set(FUZZING_LINKER_FLAGS "-fsanitize-coverage=trace-pc-guard -fprofile-instr-generate") + + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${FUZZING_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FUZZING_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${FUZZING_LINKER_FLAGS}" PARENT_SCOPE) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FUZZING_LINKER_FLAGS}") + endif() +endfunction(enable_fuzzing) + + +function(add_fuzzer FUZZER_EXE_NAME FUZZER_SOURCES) + add_executable(${FUZZER_EXE_NAME} ${FUZZER_SOURCES}) + if(WITH_LIBFUZZER) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=fuzzer" PARENT_SCOPE) + endif() + target_link_libraries(${FUZZER_EXE_NAME} PRIVATE fuzz-testhelper) +endfunction(add_fuzzer) diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake index 18ccdf086bc2f8..8265701a57726d 100644 --- a/inference-engine/cmake/ie_parallel.cmake +++ b/inference-engine/cmake/ie_parallel.cmake @@ -1,12 +1,11 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # function(set_ie_threading_interface_for TARGET_NAME) set(IE_THREAD_DEFINE "IE_THREAD_SEQ") - if (THREADING STREQUAL "TBB") + if (THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") if (NOT (IE_MAIN_SOURCE_DIR)) set(incl_path ${IE_EXTERNAL_DIR}/tbb/include) if (WIN32) @@ -22,7 +21,7 @@ function(set_ie_threading_interface_for TARGET_NAME) set(lib_dbg_path ${lib_rel_path}) endif () - if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE OR NOT TBB_LIBRARIES_DEBUG) + if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE) find_path(TBB_INCLUDE_DIRS tbb/tbb.h ${incl_path} NO_DEFAULT_PATH) find_library(TBB_LIBRARIES_RELEASE tbb ${lib_rel_path} NO_DEFAULT_PATH) find_library(TBB_LIBRARIES_DEBUG tbb_debug ${lib_dbg_path} NO_DEFAULT_PATH) @@ -31,20 +30,31 @@ function(set_ie_threading_interface_for TARGET_NAME) ext_message(STATUS "TBB Debug lib: ${TBB_LIBRARIES_DEBUG}") endif () - if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE OR NOT TBB_LIBRARIES_DEBUG) + if (NOT TBB_INCLUDE_DIRS OR NOT TBB_LIBRARIES_RELEASE) ext_message(WARNING "TBB not found. TBB support will be disabled. ${IE_THREAD_DEFINE} is defined") else () set(IE_THREAD_DEFINE "IE_THREAD_TBB") + target_include_directories(${TARGET_NAME} PUBLIC ${TBB_INCLUDE_DIRS}) if (WIN32) target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp") - target_link_libraries(${TARGET_NAME} PUBLIC "$<$:${TBB_LIBRARIES_DEBUG}>;$<$>:${TBB_LIBRARIES_RELEASE}>") - else() - if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") - target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_DEBUG}) - else() - target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE}) + endif () + + # Debug binaries are optional. + if (TBB_LIBRARIES_DEBUG) + if (WIN32) + target_link_libraries(${TARGET_NAME} PUBLIC "$<$:${TBB_LIBRARIES_DEBUG}>;$<$>:${TBB_LIBRARIES_RELEASE}>") + else () + if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_DEBUG}) + else() + target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE}) + endif () endif () + else () + # Link Release library to all configurations. + ext_message(WARNING "TBB Debug binaries are missed.") + target_link_libraries(${TARGET_NAME} PUBLIC ${TBB_LIBRARIES_RELEASE}) endif () endif () elseif (THREADING STREQUAL "OMP") @@ -67,31 +77,41 @@ function(set_ie_threading_interface_for TARGET_NAME) set(lib_dbg_path ${lib_rel_path}) endif () - if (NOT OMP_LIBRARIES_RELEASE OR NOT OMP_LIBRARIES_DEBUG) + if (NOT OMP_LIBRARIES_RELEASE) find_library(OMP_LIBRARIES_RELEASE ${omp_lib_name} ${lib_rel_path} NO_DEFAULT_PATH) find_library(OMP_LIBRARIES_DEBUG ${omp_lib_name} ${lib_dbg_path} NO_DEFAULT_PATH) ext_message(STATUS "OMP Release lib: ${OMP_LIBRARIES_RELEASE}") ext_message(STATUS "OMP Debug lib: ${OMP_LIBRARIES_DEBUG}") endif () - if (NOT OMP_LIBRARIES_RELEASE OR NOT OMP_LIBRARIES_DEBUG) + if (NOT OMP_LIBRARIES_RELEASE) ext_message(WARNING "Intel OpenMP not found. Intel OpenMP support will be disabled. ${IE_THREAD_DEFINE} is defined") else () set(IE_THREAD_DEFINE "IE_THREAD_OMP") - + if (WIN32) target_compile_options(${TARGET_NAME} PUBLIC ${OpenMP_CXX_FLAGS} /openmp) target_compile_options(${TARGET_NAME} PUBLIC ${OpenMP_CXX_FLAGS} /Qopenmp) - target_link_libraries(${TARGET_NAME} PUBLIC "-nodefaultlib:vcomp") - target_link_libraries(${TARGET_NAME} PUBLIC "$<$:${OMP_LIBRARIES_DEBUG}>;$<$>:${OMP_LIBRARIES_RELEASE}>") else() target_compile_options(${TARGET_NAME} PUBLIC ${OpenMP_CXX_FLAGS} -fopenmp) - if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") - target_link_libraries(${TARGET_NAME} PUBLIC ${OMP_LIBRARIES_DEBUG}) + endif () + + # Debug binaries are optional. + if (OMP_LIBRARIES_DEBUG) + if (WIN32) + target_link_libraries(${TARGET_NAME} PUBLIC "$<$:${OMP_LIBRARIES_DEBUG}>;$<$>:${OMP_LIBRARIES_RELEASE}>") else() - target_link_libraries(${TARGET_NAME} PUBLIC ${OMP_LIBRARIES_RELEASE}) + if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + target_link_libraries(${TARGET_NAME} PUBLIC ${OMP_LIBRARIES_DEBUG}) + else() + target_link_libraries(${TARGET_NAME} PUBLIC ${OMP_LIBRARIES_RELEASE}) + endif () endif () + else () + # Link Release library to all configurations. + ext_message(WARNING "OMP Debug binaries are missed.") + target_link_libraries(${TARGET_NAME} PUBLIC ${OMP_LIBRARIES_RELEASE}) endif () endif () diff --git a/inference-engine/cmake/itt.cmake b/inference-engine/cmake/itt.cmake deleted file mode 100644 index 3ed2394c456ccd..00000000000000 --- a/inference-engine/cmake/itt.cmake +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -unset(ITT_INCLUDE_DIR CACHE) -unset(ITT_LIB CACHE) - -if(NOT DEFINED INTEL_VTUNE_DIR AND DEFINED ENV{INTEL_VTUNE_DIR}) - set(INTEL_VTUNE_DIR "$ENV{INTEL_VTUNE_DIR}") -endif() -if(NOT DEFINED INTEL_VTUNE_DIR) - if(EXISTS "/opt/intel/vtune_amplifier_xe/include") - set(INTEL_VTUNE_DIR "/opt/intel/vtune_amplifier_xe") - elseif(EXISTS "/opt/intel/vtune_amplifier/include") - set(INTEL_VTUNE_DIR "/opt/intel/vtune_amplifier") - elseif (EXISTS "C:/Program Files (x86)/IntelSWTools/VTune Amplifier XE") - set(INTEL_VTUNE_DIR "C:/Program Files (x86)/IntelSWTools/VTune Amplifier XE") - elseif (EXISTS "C:/Program Files (x86)/IntelSWTools/VTune Amplifier") - set(INTEL_VTUNE_DIR "C:/Program Files (x86)/IntelSWTools/VTune Amplifier") - endif() -endif() - -if(DEFINED INTEL_VTUNE_DIR) - message(STATUS "INTEL_VTUNE_DIR = ${INTEL_VTUNE_DIR}") - - find_path(ITT_INCLUDE_DIR - FILES - ittnotify.h - PATHS "${INTEL_VTUNE_DIR}/include/") - - find_library(ITT_LIB - "libittnotify${CMAKE_STATIC_LIBRARY_SUFFIX}" - PATHS ${INTEL_VTUNE_DIR}/lib64) - - set(Located_ITT_LIBS ${ITT_LIB} ${CMAKE_DL_LIBS}) - set(Located_ITT_INCLUDE_DIRS ${ITT_INCLUDE_DIR}) -else() - message(STATUS "INTEL_VTUNE_DIR is not defined") -endif() - -# Handle find_package() arguments, and set INTEL_ITT_FOUND -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(INTEL_ITT - REQUIRED_VARS - Located_ITT_INCLUDE_DIRS - Located_ITT_LIBS) - -if(ENABLE_PROFILING_ITT AND INTEL_ITT_FOUND) - add_definitions(-DENABLE_PROFILING_ITT=1) - - set(INTEL_ITT_LIBS ${Located_ITT_LIBS}) - set(INTEL_ITT_INCLUDE_DIRS ${Located_ITT_INCLUDE_DIRS}) - - message(STATUS "INTEL_ITT_INCLUDE_DIRS: ${INTEL_ITT_INCLUDE_DIRS}") - include_directories(${INTEL_ITT_INCLUDE_DIRS}) - message(STATUS "INTEL_ITT_LIBS: ${INTEL_ITT_LIBS}") -else() - add_definitions(-DENABLE_PROFILING_ITT=0) - message(STATUS "INTEL_ITT is disabled") -endif() - diff --git a/inference-engine/cmake/linux_name.cmake b/inference-engine/cmake/linux_name.cmake index 8b07919bc27e3b..6713808db871d9 100644 --- a/inference-engine/cmake/linux_name.cmake +++ b/inference-engine/cmake/linux_name.cmake @@ -1,9 +1,8 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # -if (UNIX) +if (LINUX) function(get_linux_name res_var) if (NOT EXISTS "/etc/lsb-release") execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \; diff --git a/inference-engine/cmake/mode.cmake b/inference-engine/cmake/mode.cmake deleted file mode 100644 index 3e55471d895219..00000000000000 --- a/inference-engine/cmake/mode.cmake +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 -# - -option(DEVELOPMENT_PLUGIN_MODE "Disabled build of all plugins" OFF) diff --git a/inference-engine/cmake/options.cmake b/inference-engine/cmake/options.cmake index 3cc68d65450621..da9da11109bb8e 100644 --- a/inference-engine/cmake/options.cmake +++ b/inference-engine/cmake/options.cmake @@ -1,21 +1,27 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # # Usage: ie_option( "description" [IF ]) function (ie_option variable description value) - option(${variable} "${description}" ${value}) - list (APPEND IE_OPTIONS "${variable}") + list(FIND IE_OPTIONS "${variable}" result) + + if(${result} EQUAL -1) + option(${variable} "${description}" ${value}) + list (APPEND IE_OPTIONS "${variable}") - set (IE_OPTIONS "${IE_OPTIONS}" PARENT_SCOPE) + set (IE_OPTIONS "${IE_OPTIONS}" PARENT_SCOPE) + endif() endfunction() include(version) function (print_enabled_features) - message(STATUS "CI_BUILD_NUMBER: ${CI_BUILD_NUMBER}") + message(STATUS "Inference Engine enabled features: ") + message("") + message(" CI_BUILD_NUMBER: ${CI_BUILD_NUMBER}") foreach(_var ${IE_OPTIONS}) - message(STATUS "${_var} = ${${_var}}") + message(" ${_var} = ${${_var}}") endforeach() + message("") endfunction() diff --git a/inference-engine/cmake/os_flags.cmake b/inference-engine/cmake/os_flags.cmake index 29608ea83f3aae..6a5442f854c79f 100644 --- a/inference-engine/cmake/os_flags.cmake +++ b/inference-engine/cmake/os_flags.cmake @@ -1,14 +1,35 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # +macro(disable_deprecated_warnings) + if(WIN32) + if("${CMAKE_CXX_COMPILER_ID}" MATCHES Intel) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-warning:1478") + elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996") # disable warning on deprecated API + endif() + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") + endif() +endmacro() + if (WIN32) set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS _CRT_SECURE_NO_WARNINGS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE") + if (TREAT_WARNING_AS_ERROR) + if(CMAKE_CXX_COMPILER_ID MATCHES Intel) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qdiag-warning:2586,177,3180,1740,1786,47,161") + elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) +# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX") # Too many warnings + endif() + endif() + + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Z7") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7") @@ -26,7 +47,6 @@ if (WIN32) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${DEBUG_SYMBOLS_LINKER_FLAGS}") endif() - else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Werror=return-type ") if (APPLE) diff --git a/inference-engine/cmake/plugins/create_plugin_file.cmake b/inference-engine/cmake/plugins/create_plugin_file.cmake new file mode 100644 index 00000000000000..f5b5c02206a591 --- /dev/null +++ b/inference-engine/cmake/plugins/create_plugin_file.cmake @@ -0,0 +1,27 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(newContent " ") + +if(IE_PLUGIN_PROPERTIES) + set(newContent "${newContent} + ") + + foreach(props IN LISTS IE_PLUGIN_PROPERTIES) + string(REPLACE "," ";" props "${props}") + + list(GET props 0 key) + list(GET props 1 value) + set(newContent "${newContent} + ") + endforeach() + + set(newContent "${newContent} + ") +endif() + +set(newContent "${newContent} + ") + +file(WRITE "${IE_CONFIG_OUTPUT_FILE}" "${newContent}") diff --git a/inference-engine/cmake/plugins/plugins.cmake b/inference-engine/cmake/plugins/plugins.cmake new file mode 100644 index 00000000000000..a66237078d05e9 --- /dev/null +++ b/inference-engine/cmake/plugins/plugins.cmake @@ -0,0 +1,132 @@ +# Copyright (C) 2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +include(CMakeParseArguments) + +set(PLUGIN_FILES "" CACHE INTERNAL "") + +function(get_shared_library_name target_name library_name) + set(LIB_PREFIX "${CMAKE_SHARED_LIBRARY_PREFIX}") + set(LIB_SUFFIX "${IE_BUILD_POSTFIX}${CMAKE_SHARED_LIBRARY_SUFFIX}") + + set("${library_name}" "${LIB_PREFIX}${target_name}${LIB_SUFFIX}" PARENT_SCOPE) +endfunction() + +if(NOT TARGET ie_plugins) + add_custom_target(ie_plugins) +endif() + +# +# ie_add_plugin(NAME +# DEVICE_NAME +# SOURCES +# VERSION_DEFINES_FOR +# ) +# +function(ie_add_plugin) + set(options) + set(oneValueArgs NAME DEVICE_NAME VERSION_DEFINES_FOR) + set(multiValueArgs SOURCES) + cmake_parse_arguments(IE_PLUGIN "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT IE_PLUGIN_NAME) + message(FATAL_ERROR "Please, specify plugin target name") + endif() + + if(NOT IE_PLUGIN_DEVICE_NAME) + message(FATAL_ERROR "Please, specify device name for ${IE_PLUGIN_NAME}") + endif() + + # create and configure target + + if(IE_PLUGIN_VERSION_DEFINES_FOR) + addVersionDefines(${IE_PLUGIN_VERSION_DEFINES_FOR} CI_BUILD_NUMBER) + endif() + + add_library(${IE_PLUGIN_NAME} SHARED ${IE_PLUGIN_SOURCES}) + target_compile_definitions(${IE_PLUGIN_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_PLUGIN) + + if(WIN32) + set_target_properties(${IE_PLUGIN_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}) + endif() + + add_cpplint_target(${IE_PLUGIN_NAME}_cpplint FOR_TARGETS ${IE_PLUGIN_NAME}) + + # append plugin to the list to register + + list(APPEND PLUGIN_FILES "${IE_PLUGIN_DEVICE_NAME}:${IE_PLUGIN_NAME}") + list(REMOVE_DUPLICATES PLUGIN_FILES) + set(PLUGIN_FILES "${PLUGIN_FILES}" CACHE INTERNAL "" FORCE) + + add_dependencies(ie_plugins ${IE_PLUGIN_NAME}) +endfunction() + +# +# ie_register_plugins(MAIN_TARGET
+# POSSIBLE_PLUGINS ) +# +macro(ie_register_plugins) + set(options) + set(oneValueArgs MAIN_TARGET) + set(multiValueArgs POSSIBLE_PLUGINS) + cmake_parse_arguments(IE_REGISTER "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT IE_REGISTER_MAIN_TARGET) + message(FATAL_ERROR "Please, define MAIN_TARGET") + endif() + + set(plugins_to_remove ${IE_REGISTER_POSSIBLE_PLUGINS}) + set(plugin_files_local) + set(config_output_file "$/plugins.xml") + + foreach(plugin IN LISTS plugins_to_remove) + add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD + COMMAND + "${CMAKE_COMMAND}" + -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}" + -D "IE_PLUGIN_NAME=${plugin}" + -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins" + -P "${IE_MAIN_SOURCE_DIR}/cmake/plugins/unregister_plugin_cmake.cmake" + COMMENT + "Remove ${plugin} from the plugins.xml file" + VERBATIM) + endforeach() + + foreach(name IN LISTS PLUGIN_FILES) + string(REPLACE ":" ";" name "${name}") + list(LENGTH name length) + if(NOT ${length} EQUAL 2) + message(FATAL_ERROR "Unexpected error, please, contact developer of this script") + endif() + list(GET name 0 device_name) + list(GET name 1 name) + + # create plugin file + set(config_file_name "${CMAKE_BINARY_DIR}/plugins/${name}.xml") + get_shared_library_name(${name} library_name) + + add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD + COMMAND + "${CMAKE_COMMAND}" + -D "IE_CONFIG_OUTPUT_FILE=${config_file_name}" + -D "IE_DEVICE_NAME=${device_name}" + -D "IE_PLUGIN_LIBRARY_NAME=${library_name}" + -P "${IE_MAIN_SOURCE_DIR}/cmake/plugins/create_plugin_file.cmake" + COMMENT "Register ${name} plugin" + VERBATIM) + + list(APPEND plugin_files_local "${config_file_name}") + endforeach() + + add_custom_command(TARGET ${IE_REGISTER_MAIN_TARGET} POST_BUILD + COMMAND + "${CMAKE_COMMAND}" + -D "CMAKE_SHARED_LIBRARY_PREFIX=${CMAKE_SHARED_LIBRARY_PREFIX}" + -D "IE_CONFIG_OUTPUT_FILE=${config_output_file}" + -D "IE_CONFIGS_DIR=${CMAKE_BINARY_DIR}/plugins" + -P "${IE_MAIN_SOURCE_DIR}/cmake/plugins/register_plugin_cmake.cmake" + COMMENT + "Registering plugins to plugins.xml config file" + VERBATIM) +endmacro() diff --git a/inference-engine/cmake/plugins/register_plugin_cmake.cmake b/inference-engine/cmake/plugins/register_plugin_cmake.cmake new file mode 100644 index 00000000000000..140f8ae39078a6 --- /dev/null +++ b/inference-engine/cmake/plugins/register_plugin_cmake.cmake @@ -0,0 +1,65 @@ +# Copyright (C) 2018-2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(file_content +" + + +") + +if(NOT EXISTS "${IE_CONFIG_OUTPUT_FILE}") + file(WRITE "${IE_CONFIG_OUTPUT_FILE}" "${file_content}") +endif() + +# get list of plugin files +file(GLOB plugin_files "${IE_CONFIGS_DIR}/*.xml") + +function(check_plugin_exists plugin_name outvar) + set(${outvar} OFF PARENT_SCOPE) + + # check if config file already has this plugin + file(STRINGS "${IE_CONFIG_OUTPUT_FILE}" content REGEX "plugin .*=\"") + + foreach(line IN LISTS content) + string(REGEX MATCH "location=\"([^\"]*)\"" location "${line}") + get_filename_component(location "${CMAKE_MATCH_1}" NAME_WE) + + if("${CMAKE_SHARED_LIBRARY_PREFIX}${plugin_name}" MATCHES "${location}") + # plugin has already registered + set(${outvar} ON PARENT_SCOPE) + endif() + endforeach() +endfunction() + +set(plugin_files_to_add) +foreach(plugin_file IN LISTS plugin_files) + get_filename_component(plugin_name "${plugin_file}" NAME_WE) + check_plugin_exists("${plugin_name}" exists) + + if(NOT exists) + list(APPEND plugin_files_to_add "${plugin_file}") + endif() +endforeach() + +# add plugin +set(newContent "") +file(STRINGS "${IE_CONFIG_OUTPUT_FILE}" content) + +foreach(line IN LISTS content) + if("${line}" MATCHES "") + foreach(plugin_file IN LISTS plugin_files_to_add) + file(READ "${plugin_file}" content) + set(newContent "${newContent} +${content}") + endforeach() + endif() + + if(newContent) + set(newContent "${newContent}\n${line}") + else() + set(newContent "${line}") + endif() +endforeach() + +file(WRITE "${IE_CONFIG_OUTPUT_FILE}" "${newContent}") diff --git a/inference-engine/cmake/plugins/unregister_plugin_cmake.cmake b/inference-engine/cmake/plugins/unregister_plugin_cmake.cmake new file mode 100644 index 00000000000000..086ef90b19c8ec --- /dev/null +++ b/inference-engine/cmake/plugins/unregister_plugin_cmake.cmake @@ -0,0 +1,35 @@ +# Copyright (C) 2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +if(NOT EXISTS "${IE_CONFIG_OUTPUT_FILE}") + return() +endif() + +# remove plugin file +file(REMOVE "${IE_CONFIGS_DIR}/${IE_PLUGIN_NAME}.xml") + +# remove plugin +set(newContent "") +file(STRINGS "${IE_CONFIG_OUTPUT_FILE}" content) + +set(skip_plugin OFF) +foreach(line IN LISTS content) + if("${line}" MATCHES "${IE_PLUGIN_NAME}") + set(skip_plugin ON) + endif() + + if(NOT skip_plugin) + if(newContent) + set(newContent "${newContent}\n${line}") + else() + set(newContent "${line}") + endif() + endif() + + if("${line}" MATCHES "") + set(skip_plugin OFF) + endif() +endforeach() + +file(WRITE "${IE_CONFIG_OUTPUT_FILE}" "${newContent}") diff --git a/inference-engine/cmake/sanitizer.cmake b/inference-engine/cmake/sanitizer.cmake index 23814e7adcb71a..aade475075223c 100644 --- a/inference-engine/cmake/sanitizer.cmake +++ b/inference-engine/cmake/sanitizer.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/sdl.cmake b/inference-engine/cmake/sdl.cmake index e6229a7b2633f3..ee57890454afa5 100644 --- a/inference-engine/cmake/sdl.cmake +++ b/inference-engine/cmake/sdl.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in b/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in index bc4c3a99788d81..c7911a622b9383 100644 --- a/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in +++ b/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in @@ -1,9 +1,8 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # -set(InferenceEngine_VERSION 1.6.0) +set(InferenceEngine_VERSION 2.0.0) set(PACKAGE_VERSION ${InferenceEngine_VERSION}) set(PACKAGE_VERSION_EXACT False) diff --git a/inference-engine/cmake/share/InferenceEngineConfig.cmake.in b/inference-engine/cmake/share/InferenceEngineConfig.cmake.in index 42b2df73cac8c3..3bbb0cf7ec19f2 100644 --- a/inference-engine/cmake/share/InferenceEngineConfig.cmake.in +++ b/inference-engine/cmake/share/InferenceEngineConfig.cmake.in @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # # @@ -42,11 +41,10 @@ else() if (WIN32) set(_ARCH intel64) else() - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") - set(_ARCH armv7l) - elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") + string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} _ARCH) + if(_ARCH STREQUAL "x86_64" OR _ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64 set(_ARCH intel64) - elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386") + elseif(_ARCH STREQUAL "i386") set(_ARCH ia32) endif() endif() @@ -70,59 +68,32 @@ else() endif() endif() - if(NOT IE_ROOT_DIR) - ext_message(FATAL_ERROR "inference_engine directory is not found") + if(NOT IE_ROOT_DIR) + ext_message(FATAL_ERROR "inference_engine root directory is not found") endif() + find_path(IE_INCLUDE_DIR inference_engine.hpp "${IE_ROOT_DIR}/include" NO_DEFAULT_PATH) + find_path(IE_SRC_DIR extension "${IE_ROOT_DIR}/src" NO_DEFAULT_PATH) - if(IE_INCLUDE_DIR AND NOT "${IE_ROOT_DIR}/include" EQUAL "${IE_INCLUDE_DIR}") - unset(IE_INCLUDE_DIR CACHE) - endif() - - if(IE_SRC_DIR AND NOT "${IE_ROOT_DIR}/src" EQUAL "${IE_SRC_DIR}") - unset(IE_SRC_DIR CACHE) - endif() - - if(IE_LIBRARY AND NOT "${IE_ROOT_DIR}/lib/${_ARCH}" EQUAL "${IE_LIBRARY}") - unset(IE_LIBRARY CACHE) - endif() - - set(_IE_ROOT_INCLUDE_DIR "${IE_ROOT_DIR}/include") - set(_IE_ROOT_SRC_DIR "${IE_ROOT_DIR}/src") - set(_IE_ROOT_LIBRARY "${IE_ROOT_DIR}/lib/${_ARCH}") - - find_path(IE_INCLUDE_DIR inference_engine.hpp "${_IE_ROOT_INCLUDE_DIR}") - find_path(IE_SRC_DIR extension "${_IE_ROOT_SRC_DIR}") - - set(IE_LIB_DIR "${_IE_ROOT_LIBRARY}") + set(IE_LIB_DIR "${IE_ROOT_DIR}/lib/${_ARCH}") set(IE_LIB_REL_DIR "${IE_LIB_DIR}/Release") set(IE_LIB_DBG_DIR "${IE_LIB_DIR}/Debug") - set(IE_EXTERNAL_DIR "${IE_ROOT_DIR}/external") include(FindPackageHandleStandardArgs) - if (WIN32) - find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_WIN@ "${IE_LIB_REL_DIR}") - find_library(IE_DEBUG_LIBRARY inference_engine@IE_DEBUG_POSTFIX_WIN@ "${IE_LIB_DBG_DIR}") - find_package_handle_standard_args( InferenceEngine - FOUND_VAR INFERENCEENGINE_FOUND - REQUIRED_VARS IE_RELEASE_LIBRARY IE_DEBUG_LIBRARY IE_INCLUDE_DIR - FAIL_MESSAGE "Inference Engine cannot be found at ${_IE_ROOT_LIBRARY}. Please consult InferenceEgnineConfig.cmake module's help page.") - elseif (APPLE) - find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_MAC@ "${IE_LIB_DIR}") - find_library(IE_DEBUG_LIBRARY inference_engine@IE_DEBUG_POSTFIX_MAC@ "${IE_LIB_DIR}") - find_package_handle_standard_args( InferenceEngine - FOUND_VAR INFERENCEENGINE_FOUND - REQUIRED_VARS IE_RELEASE_LIBRARY IE_DEBUG_LIBRARY IE_INCLUDE_DIR - FAIL_MESSAGE "Inference Engine cannot be found at ${_IE_ROOT_LIBRARY}. Please consult InferenceEgnineConfig.cmake module's help page.") - + if(WIN32) + find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_WIN@ "${IE_LIB_REL_DIR}" NO_DEFAULT_PATH) + elseif(APPLE) + find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH) else() - find_library(IE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_LIN@ "${IE_LIB_DIR}") - find_package_handle_standard_args( InferenceEngine - FOUND_VAR INFERENCEENGINE_FOUND - REQUIRED_VARS IE_LIBRARY IE_INCLUDE_DIR - FAIL_MESSAGE "Inference Engine cannot be found at ${_IE_ROOT_LIBRARY}. Please consult InferenceEgnineConfig.cmake module's help page.") + find_library(IE_RELEASE_LIBRARY inference_engine@IE_RELEASE_POSTFIX_LIN@ "${IE_LIB_DIR}" NO_DEFAULT_PATH) endif() + + find_package_handle_standard_args( InferenceEngine + FOUND_VAR INFERENCEENGINE_FOUND + REQUIRED_VARS IE_RELEASE_LIBRARY IE_INCLUDE_DIR + FAIL_MESSAGE "Some of mandatory Inference Engine components are not found. Please consult InferenceEgnineConfig.cmake module's help page.") + if(INFERENCEENGINE_FOUND) # to keep this line for successful execution in CMake 2.8 set(InferenceEngine_FOUND TRUE) @@ -130,25 +101,42 @@ else() add_library(IE::inference_engine SHARED IMPORTED GLOBAL) if (WIN32) - set_property(TARGET IE::inference_engine APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) - set_property(TARGET IE::inference_engine APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) - set_target_properties(IE::inference_engine PROPERTIES + IMPORTED_CONFIGURATIONS RELEASE IMPORTED_IMPLIB_RELEASE "${IE_RELEASE_LIBRARY}" - IMPORTED_IMPLIB_DEBUG "${IE_DEBUG_LIBRARY}" - MAP_IMPORTED_CONFIG_DEBUG Debug MAP_IMPORTED_CONFIG_RELEASE Release MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}") + + # Debug binaries are optional + find_library(IE_DEBUG_LIBRARY inference_engine@IE_DEBUG_POSTFIX_WIN@ "${IE_LIB_DBG_DIR}" NO_DEFAULT_PATH) + if (IE_DEBUG_LIBRARY) + set_property(TARGET IE::inference_engine APPEND PROPERTY IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(IE::inference_engine PROPERTIES + IMPORTED_IMPLIB_DEBUG "${IE_DEBUG_LIBRARY}" + MAP_IMPORTED_CONFIG_DEBUG Debug) + else() + ext_message(WARNING "Inference Engine DEBUG binaries are missed.") + endif() elseif (APPLE) set_target_properties(IE::inference_engine PROPERTIES IMPORTED_LOCATION_RELEASE "${IE_RELEASE_LIBRARY}" - IMPORTED_LOCATION_DEBUG "${IE_DEBUG_LIBRARY}" INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}") + + # Debug binaries are optional + find_library(IE_DEBUG_LIBRARY inference_engine@IE_DEBUG_POSTFIX_MAC@ "${IE_LIB_DIR}" NO_DEFAULT_PATH) + if (IE_DEBUG_LIBRARY) + set_target_properties(IE::inference_engine PROPERTIES + IMPORTED_LOCATION_DEBUG "${IE_DEBUG_LIBRARY}") + else() + ext_message(WARNING "Inference Engine DEBUG binaries are missed") + endif() + target_link_libraries(IE::inference_engine INTERFACE ${CMAKE_DL_LIBS}) else() + # Only Release binaries are distributed for Linux systems set_target_properties(IE::inference_engine PROPERTIES - IMPORTED_LOCATION "${IE_LIBRARY}" + IMPORTED_LOCATION "${IE_RELEASE_LIBRARY}" INTERFACE_INCLUDE_DIRECTORIES "${IE_INCLUDE_DIR}") target_link_libraries(IE::inference_engine INTERFACE ${CMAKE_DL_LIBS}) endif() @@ -156,6 +144,7 @@ else() set(InferenceEngine_INCLUDE_DIRS ${IE_INCLUDE_DIR}) set(InferenceEngine_LIBRARIES IE::inference_engine) + set(IE_EXTERNAL_DIR "${IE_ROOT_DIR}/external") include("${IE_ROOT_DIR}/share/ie_parallel.cmake") add_subdirectory(${IE_SRC_DIR}/extension EXCLUDE_FROM_ALL ie_cpu_extension) diff --git a/inference-engine/cmake/version.cmake b/inference-engine/cmake/version.cmake index daf21cd5cc6a47..b9dcd75be38582 100644 --- a/inference-engine/cmake/version.cmake +++ b/inference-engine/cmake/version.cmake @@ -1,5 +1,4 @@ # Copyright (C) 2018-2019 Intel Corporation -# # SPDX-License-Identifier: Apache-2.0 # diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt index 6176cccd0d179e..9fca2145d36705 100644 --- a/inference-engine/ie_bridges/python/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/CMakeLists.txt @@ -5,15 +5,13 @@ cmake_minimum_required (VERSION 3.3) project (ie_python_api) set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/cmake) -if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") - set (ARCH armv7l) -elseif ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - set (ARCH intel64) -else() - set (ARCH ia32) +string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH) +if(ARCH STREQUAL "x86_64" OR ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64 + set(ARCH intel64) +elseif(ARCH STREQUAL "i386") + set(ARCH ia32) endif() - # in case of independent python api build (out of Inference Engine root Cmake) if (NOT(IE_MAIN_SOURCE_DIR)) if("${CMAKE_BUILD_TYPE}" STREQUAL "") @@ -50,4 +48,5 @@ endif() find_package (InferenceEngine REQUIRED) set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) -add_subdirectory (src/openvino/inference_engine) \ No newline at end of file +add_subdirectory (src/openvino/inference_engine) +add_subdirectory (src/openvino/tools/statistics_collector) diff --git a/inference-engine/ie_bridges/python/docs/api_overview.md b/inference-engine/ie_bridges/python/docs/api_overview.md index 8365cc84c16d26..e874177d6fb617 100644 --- a/inference-engine/ie_bridges/python/docs/api_overview.md +++ b/inference-engine/ie_bridges/python/docs/api_overview.md @@ -11,35 +11,225 @@ This API provides a simplified interface for Inference Engine functionality that ## Supported OSes -Currently the Inference Engine Python\* API is supported on Ubuntu* 16.04, Microsoft Windows* 10 and CentOS* 7.3 OSes. +Currently the Inference Engine Python\* API is supported on Ubuntu\* 16.04 and 18.04, Windows\* 10, macOS\* 10.x and +CentOS\* 7.3 OSes. Supported Python* versions: -* On Ubuntu 16.04: 2.7, 3.5, 3.6 -* On Windows 10: 3.5, 3.6 -* On CentOS 7.3: 3.4, 3.5, 3.6 +| Operating System | Supported Python\* versions: | +|:----- | :----- | +| Ubuntu\* 16.04 | 2.7, 3.5, 3.6, 3.7 | +| Ubuntu\* 18.04 | 2.7, 3.5, 3.6, 3.7 | +| Windows\* 10 | 3.5, 3.6, 3.7 | +| CentOS\* 7.3 | 3.4, 3.5, 3.6, 3.7 | +| macOS\* 10.x | 3.5, 3.6, 3.7 | + ## Setting Up the Environment To configure the environment for the Inference Engine Python\* API, run: - * On Ubuntu 16.04: `source /bin/setupvars.sh .` - * On Windows 10: `call \deployment_tools\inference_engine\python_api\setenv.bat` + * On Ubuntu\* 16.04 or 18.04, CentOS\* 7.4 or macOS\* 10.x: `source /bin/setupvars.sh .` + * On Windows\* 10: `call \deployment_tools\inference_engine\python_api\setenv.bat` The script automatically detects latest installed Python\* version and configures required environment if the version is supported. If you want to use certain version of Python\*, set the environment variable `PYTHONPATH=/deployment_tools/inference_engine/python_api/` after running the environment configuration script. +## IECore + +This class represents an Inference Engine entity and allows you to manipulate with plugins using unified interfaces. + +### Class Constructor + +`__init__(xml_config_file: str = "")` +* Parameters: + + * `xml_config_file` - A full path to `.xml` file containing plugins configuration. + If the parameter is not specified, the default configuration is handled automatically. +* Usage examples: + + * Initialize an `IECore` object with default configuration: + ```py + ie = IECore() + ``` + * Initialize an `IECore` object with a custom configuration location specified: + ```py + ie = IECore("/localdisk/plugins/my_custom_cfg.xml") + ``` +`.xml` file has the following structure: +```xml + + + + + + + + + + + + + +``` + +### Class Attributes +* `available_devices` - A vector of devices. The devices are returned as \[CPU, FPGA.0, FPGA.1, MYRIAD\]. + If there are more than one device of a specific type, they all are listed followed by a dot and a number. + +### Instance Methods + +* `get_versions(device_name: str)` + * Description: Returns a `namedtuple` object with versions of the plugin specified + * Parameters: + * `device_name` - Name of the the registered plugin + * Return value: + Dictionary mapping a plugin name and `Versions` `namedtuple` object with the following fields: + * `major` - major plugin integer version + * `minor` - minor plugin integer version + * `build_number` - plugin build number string + * `description` - plugin description string + * Usage example: +```py +ie = IECore() +ver = ie.get_versions("CPU")["CPU"] +print("{descr}: {maj}.{min}.{num}".format(descr=ver.description, maj=ver.major, min=ver.minor, num=ver.build_number)) +``` + +* `load_network(network: IENetwork, device_name: str, config=None, num_requests: int=1)` + * Description: Loads a network that was read from the Intermediate Representation (IR) to the plugin with specified device name and creates an `ExecutableNetwork` object of the `IENetwork` class. + You can create as many networks as you need and use them simultaneously (up to the limitation of the hardware + resources). + * Parameters: + * `network` - A valid `IENetwork` instance + * `device_name` - A device name of a target plugin + * `num_requests` - A positive integer value of infer requests to be created. Number of infer requests is limited + by device capabilities. + * `config` - A dictionary of plugin configuration keys and their values + * Return value: An `ExecutableNetwork` object + * Usage example: +```py +net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) +ie = IECore() +exec_net = plugin.load_network(network=net, device_name="CPU", num_requsts=2) +``` + +* `query_network(network: IENetwork, device_name: str, config=None)` + * Description: + Queries the plugin with specified device name what network layers are supported in the current configuration. + Please note that layers support depends on plugin configuration and loaded extensions. + * Parameters: + * `network` - A valid `IENetwork` instance + * `device_name` - A device name of a target plugin + * `config` - A dictionary of plugin configuration keys and their values + * Return value: A dictionary mapping layers and device names on which they are supported + * Usage example: +```py +net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) +ie = IECore() +exec_net = plugin.query_network(network=net, device_name="HETERO:GPU,CPU") +``` + +* `set_config(config: dict, device_name: str)` + * Description: Sets a configuration for a plugin + * Parameters: + * `config` - a dictionary of configuration parameters as keys and their values + * `device_name` - a device name of a target plugin + * Return value: None + * Usage examples: + See the `set_affinity` method of the `IENetwork` class. + +* `register_plugin(plugin_name: str, device_name: str = "")` + * Description: Registers a new device and a plugin which implement this device inside Inference Engine. + * Parameters: + * `plugin_name` - A name of a plugin. Depending on a platform, plugin_name is wrapped with a shared + library suffix and a prefix to identify a full name of the library + * `device_name` - A target device name for the plugin. If not specified, the method registers + a plugin with the default name. + * Return value: None + * Usage examples: +```py +ie = IECore() +ie.register_plugin(plugin="MKLDNNPlugin", device_name="MY_NEW_PLUGIN") +``` + +* `register_plugins(xml_config_file: str)` + * Description: Registers plugins specified in an `.xml` configuration file + * Parameters: + * `xml_config_file` - A full path to `.xml` file containing plugins configuration + * Return value: None + * Usage examples: +```py +ie = IECore() +ie.register_plugins("/localdisk/plugins/my_custom_cfg.xml") +``` + +* `unregister_plugin(device_name: str = "")` + * Description: Unregisters a plugin with a specified device name + * Parameters: + * `device_name` - A device name of the plugin to unregister + * Return value: None + * Usage examples: +```py +ie = IECore() +plugin = IEPlugin("GPU") +ie.register_plugin(plugin=plugin, device_name="MY_NEW_GPU") +ie.unregister_plugin(device_name="GPU") +``` + +* `add_extension(extension_path: str, device_name: str)` + * Description: Loads extension library to the plugin with a specified device name + * Parameters: + * `extension_path` - Path to the extensions library file to load to a plugin + * `device_name` - A device name of a plugin to load the extensions to + * Return value: None + * Usage examples: +```py +ie = IECore() +ie.add_extension(extension_path="/some_dir/libcpu_extension_avx2.so", device_name="CPU") +``` + +* `get_metric(device_name: str, metric_name: str)` + * Description: Gets a general runtime metric for dedicated hardware. Enables to request common device properties, + which are `ExecutableNetwork` agnostic, such as device name, + temperature, and other devices-specific values. + + * Parameters: + * device_name - A name of a device to get a metric value. + * metric_name - A metric name to request. + * Return value: A metric value corresponding to a metric key. + * Usage example +```py +ie = IECore() +ie.get_metric(metric_name="SUPPORTED_METRICS", device_name="CPU") +``` + +* `get_config(device_name: str, metric_name: str)` + * Description: Gets a configuration dedicated to device behavior. The method targets to extract information + which can be set via SetConfig method. + * Parameters: + * device_name - A name of a device to get a metric value. + * metric_name - A metric name to request. + * Return value: A metric value corresponding to a metric key. + * Usage example +```py +ie = IECore() +ie.get_config(metric_name="CPU_BIND_THREAD", device_name="CPU") +``` + ## IENetLayer + This class stores main information about the layer and allow to modify some layer parameters -### Class attributes: + +### Class Attributes * `name` - Name of the layer * `type`- Layer type * `precision` - Layer base operating precision. Provides getter and setter interfaces. -* `layout` - Returns the layout of shape of the layer. -* `shape` - Return the list of the shape of the layer. -* `parents` - Returns a list, which contains names of layers preceding this layer. -* `children` - Returns a list, which contains names of layers following this layer. -* `affinity` - Layer affinity set by user or a default affinity set by the `IEPlugin.set_initial_affinity()` method. +* `layout` - Returns the layout of shape of the layer +* `shape` - Return the list of the shape of the layer +* `parents` - Returns a list, which contains names of layers preceding this layer +* `children` - Returns a list, which contains names of layers following this layer +* `affinity` - Layer affinity set by user or a default affinity set by the `IEPlugin.set_initial_affinity()` method. The affinity attribute provides getter and setter interfaces, so the layer affinity can be modified directly. For example: ```py @@ -50,27 +240,23 @@ This class stores main information about the layer and allow to modify some laye >>> for l in net.layers.values(): ... if l.type == "Convolution": ... l.affinity = "CPU" - ``` + To correctly set affinity for the network, you must first initialize and properly configure the HETERO plugin. + `set_config({"TARGET_FALLBACK": "HETERO:FPGA,GPU"})` function configures the plugin fallback devices and their order. + `plugin.set_initial_affinity(net)` function sets affinity parameter of model layers according to its support + on specified devices. -To correctly set affinity for the network, you must first initialize and properly configure the HETERO plugin. -`set_config({"TARGET_FALLBACK": "HETERO:FPGA,GPU"})` function configures the plugin fallback devices and their order. -`plugin.set_initial_affinity(net)` function sets affinity parameter of model layers according to its support -on specified devices. - -After default affinity is set by the plugin, override the default values by setting affinity manually how it's -described in example above + After default affinity is set by the plugin, override the default values by setting affinity manually how it's + described in example above -To understand how default and non-default affinities are set: + To understand how default and non-default affinities are set: -1. Call `net.layers` function right after model loading and check that layer affinity parameter is empty. -2. Call `plugin.set_default_affinity(net)`. -3. Call `net.layers` and check layer affinity parameters to see how plugin set a default affinity -4. Set layer affinity how it's described above -5. Call `net.layers` again and check layer affinity parameters to see how it was changed after manual affinity - setting - -Please refer to `affinity_setting_demo.py` to see the full usage pipeline. + 1. Call `net.layers` function right after model loading and check that layer affinity parameter is empty. + 2. Call `plugin.set_default_affinity(net)`. + 3. Call `net.layers` and check layer affinity parameters to see how plugin set a default affinity + 4. Set layer affinity how it's described above + 5. Call `net.layers` again and check layer affinity parameters to see how it was changed after manual affinity + setting * `weights`- Dictionary with layer weights, biases or custom blobs if any * `params` - Layer specific parameters. Provides getter and setter interfaces to get and modify layer parameters. @@ -83,17 +269,40 @@ Please refer to `affinity_setting_demo.py` to see the full usage pipeline. This class contains the information about the network model read from IR and allows you to manipulate with some model parameters such as layers affinity and output layers. -### Class Constructor - -* `__init__(model: str, weights: str)` - * Parameters: - * model - Path to `.xml` file of the IR - * weights - Path to `.bin` file of the IR - -### Class attributes: +### Class Constructor + +`__init__(model: [bytes, str], weights: [bytes, str], init_from_buffer: bool=False, ngrpah_compatibility: bool=False)` + +* Parameters: + + * `model` - An `.xml` file of the IR. Depending on `init_from_buffer` value, can be a string path or bytes with file content. + * `weights` - A `.bin` file of the IR. Depending on `init_from_buffer` value, can be a string path or bytes with file content. + * `init_from_buffer` - Defines the way of how `model` and `weights` attributes are interpreted. + If `True`, attributes are interpreted as strings with paths to .xml and .bin files of IR. If `False`, they are + interpreted as Python `bytes` object with .xml and .bin files content. + * `ngrpah_compatibility` - Default value: `False`. If `IENetwork` initializes from + [experimental IR V7](./docs/OperationsSpecification-V7.md), set to `True` + +* Usage examples: + + * Initializing `IENetwork` object from IR files: + ```py + net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) + ``` + + * Initializing `IENetwork` object bytes with content of IR files: + ```py + with open(path_to_bin_file, 'rb') as f: + bin = f.read() + with open(path_to_xml_file, 'rb') as f: + xml = f.read() + net = IENetwork(model=xml, weights=bin, init_from_buffer=True) + ``` + +### Class Attributes * `name` - Name of the loaded network -* `inputs` - A dictionary that maps input layer names to InputInfo objects. +* `inputs` - A dictionary that maps input layer names to InputInfo objects. For example, to get a shape of the input layer: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -102,7 +311,7 @@ layers affinity and output layers. >>> net.inputs['data'].shape [1, 3, 224, 224] ``` -* `outputs` - A dictionary that maps output layer names to OutputInfo objects +* `outputs` - A dictionary that maps output layer names to OutputInfo objects For example, to get a shape of the output layer: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -124,7 +333,7 @@ layers affinity and output layers. >>> net.inputs['data'].shape [4, 3, 224, 224] ``` -* `layers` - Return dictionary that maps network layer names to `IENetLayer` +* `layers` - Return dictionary that maps network layer names to `IENetLayer` objects containing layer properties in topological order. For example, to list all network layers: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -134,7 +343,7 @@ layers affinity and output layers. } ``` * `stats` - Returns `LayersStatsMap` object containing dictionary that maps network layer names to calibration statistics - represented by `LayerStats` objects. + represented by `LayerStats` objects. `LayersStatsMap` class inherited from built-in python `dict` and overrides default `update()`method to allow to set or modify layers calibration statistics. ```py @@ -147,17 +356,15 @@ layers affinity and output layers. For more details about low precision inference please refer to "Low-Precision 8-bit Integer Inference" section in Inference Engine Developers Guide documentation. -### Class Methods +### Class Methods * `from_ir(model: str, weights: str)` -> **NOTE:** The function is deprecated. Please use `IENetwork()` class constructor to create valid instance of `IENetwork` - * Description: - The class method serves to read the model from the `.xml` and `.bin` files of the IR. +> **NOTE:** The function is deprecated. Please use the `IENetwork()` class constructor to create valid instance of `IENetwork`. + * Description: Reads the model from the `.xml` and `.bin` files of the IR. * Parameters: * model - Path to `.xml` file of the IR * weights - Path to `.bin` file of the IR - * Return value: - An instance of the `IENetwork` class + * Return value: An instance of the `IENetwork` class * Usage example: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -165,16 +372,16 @@ section in Inference Engine Developers Guide documentation. ``` -### Instance Methods +### Instance Methods -* `add_outputs(outputs)`: - * Description: - The method serves to mark any intermediate layer as output layer to retrieve the inference results - from the specified layers. +* `add_outputs(outputs)` + * Description: Marks any intermediate layer as output layer to retrieve the inference results + from the specified layers. * Parameters: - * `outputs` - List of layer names to be set as model outputs. In case of setting one layer as output, string with one layer can be provided. - * Return value: - None + * `outputs` - List of layer to be set as model outputs. The list can contain strings with layer names to be set + as outputs or tuples with layer name as first element and output port id as second element. + In case of setting one layer as output, string or tuple with one layer can be provided. + * Return value: None * Usage example: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -186,14 +393,12 @@ section in Inference Engine Developers Guide documentation. > by default. In the case above, `prob` layer is a default output and `conv5_1/dwise`, `conv2_1/expand` are user-defined > outputs. -* `reshape(input_shapes: dict)`: - * Description: - The method reshapes the network to change spatial dimensions, batch size, or any dimension. -> **Note:** Before using this method, make sure that the target shape is applicable for the network. Changing the network shape to an arbitrary value may lead to unpredictable behaviour. +* `reshape(input_shapes: dict)` + * Description: Reshapes the network to change spatial dimensions, batch size, or any dimension. +> **NOTE:** Before using this method, make sure that the target shape is applicable for the network. Changing the network shape to an arbitrary value may lead to unpredictable behaviour. * Parameters: - * `input_shapes` - The dictionary that maps input layer names to tuples with the target shape - * Return value: - None + * `input_shapes` - A dictionary that maps input layer names to tuples with the target shape + * Return value: None * Usage example: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -201,12 +406,12 @@ section in Inference Engine Developers Guide documentation. >>> n, c, h, w = net.inputs[input_layer] >>> net.reshape({input_layer: (n, c, h*2, w*2)}] ``` -* `serialize(path_to_xml, path_to_bin)`: +* `serialize(path_to_xml, path_to_bin)` * Description: - The method serializes the network and stores it in files. + Serializes the network and stores it in files. * Parameters: - * `path_to_xml` - path to a file, where a serialized model will be stored. - * `path_to_bin` - path to a file, where serialized weights will be stored. + * `path_to_xml` - Path to a file, where a serialized model will be stored + * `path_to_bin` - Path to a file, where serialized weights will be stored * Return value: None * Usage example: @@ -219,18 +424,18 @@ section in Inference Engine Developers Guide documentation. Layer calibration statistic container. -### Class Constructor +### Class Constructor * `__init__(min: tuple = (), max: tuple = ())` * Parameters: - * min - Tuple with per-channel minimum layer activation values - * max - Tuple with per-channel maximum layer activation values + * `min` - Tuple with per-channel minimum layer activation values + * `max` - Tuple with per-channel maximum layer activation values ## InputInfo This class contains the information about the network input layers -### Class attributes: +### Class Attributes * `precision` - Precision of the input data provided by user. Provides setter and getter interfaces to get and modify input layer precision. @@ -245,7 +450,7 @@ This class contains the information about the network input layers This class contains the information about the network input layers -### Class attributes: +### Class Attributes * `precision` - Precision of the output data. Provides setter and getter interfaces to get and modify output layer precision. @@ -256,23 +461,22 @@ This class contains the information about the network input layers This class is the main plugin interface and serves to initialize and configure the plugin. -### Class Constructor +### Class Constructor * `__init__(device: str, plugin_dirs=None)` * Parameters: * `device` - Target device name. Supported devices: CPU, GPU, FPGA, MYRIAD, HETERO * `plugin_dirs` - List of paths to plugin directories -### Properties +### Properties * `device` - a name of the device that was specified to initialize IEPlugin * `version` - a version of the plugin -### Instance Methods +### Instance Methods -* ```load(network: IENetwork, num_requests: int=1, config=None)``` - * Description: - Loads a network that was read from the IR to the plugin and creates an executable network from a network object. +* `load(network: IENetwork, num_requests: int=1, config=None)` + * Description: Loads a network that was read from the IR to the plugin and creates an executable network from a network object. You can create as many networks as you need and use them simultaneously (up to the limitation of the hardware resources). * Parameters: @@ -280,8 +484,7 @@ This class is the main plugin interface and serves to initialize and configure t * `num_requests` - A positive integer value of infer requests to be created. Number of infer requests may be limited by device capabilities. * `config` - A dictionary of plugin configuration keys and their values - * Return value: - None + * Return value: None * Usage example: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -290,60 +493,54 @@ This class is the main plugin interface and serves to initialize and configure t >>> exec_net ``` + * `set_initial_affinity(net: IENetwork)` - * Description: - Sets initial affinity for model layers according to the HETERO plugin logic. Applicable only if - IEPlugin was initialized for HETERO device. + * Description: Sets initial affinity for model layers according to the HETERO plugin logic. Applicable only if + `IEPlugin` was initialized for a HETERO device. * Parameters: * `net` - A valid instance of IENetwork - * Return value: - None + * Return value: None * Usage example: See `affinity` attribute of the `IENetLayer` class. * `add_cpu_extension(extension_path: str)` - * Description: - Loads extensions library to the plugin. Applicable only for CPU device and HETERO device with CPU + * Description: Loads extensions library to the plugin. Applicable only for a CPU device and a HETERO device with CPU * Parameters: * `extension_path` - A full path to CPU extensions library - * Return value: - None + * Return value: None * Usage example: ```py >>> plugin = IEPlugin(device="CPU") >>> plugin.add_cpu_extenstions(ext_lib_path) ``` + * `set_config(config: dict)` - * Description: - Sets a configuration for the plugin. Refer to `SetConfig()` in Inference Engine C++ documentation for acceptable + * Description: Sets a configuration for the plugin. Refer to `SetConfig()` in Inference Engine C++ documentation for acceptable keys and values list. * Parameters: * `config` - A dictionary of keys and values of acceptable configuration parameters - * Return value: - None + * Return value: None * Usage examples: See `set_affinity` method of the `IENetwork` class. + * `get_supported_layers(net: IENetwork)` - * Description: - Returns the set of layers supported by the plugin. Please note that in case of CPU plugin support of - a layer may depends on extension loaded by `add_cpu_extenstion()` method + * Description: Returns the set of layers supported by the plugin. Please note that for the CPU plugin, support of + a layer may depends on extension loaded by `add_cpu_extenstion()` method. * Parameters: * `net` - A valid instance of IENetwork - * Return value: - Set of layers supported by the plugin - * Usage example: - See `affinity` attribute of the `IENetLayer` class. + * Return value: Set of layers supported by the plugin + * Usage example: See `affinity` attribute of the `IENetLayer` class. ## ExecutableNetwork Class This class represents a network instance loaded to plugin and ready for inference. -### Class Constructor +### Class Constructor There is no explicit class constructor. To make a valid instance of `ExecutableNetwork`, use `load()` method of the `IEPlugin` class. -### Class attributes +### Class Attributes -* `requests` - A tuple of InferRequest instances +* `requests` - A tuple of `InferRequest` instances * Usage example: ```py >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) @@ -355,7 +552,7 @@ There is no explicit class constructor. To make a valid instance of `ExecutableN ) ``` -### Instance Methods +### Instance Methods * `infer(inputs=None)` * Description: @@ -379,16 +576,15 @@ There is no explicit class constructor. To make a valid instance of `ExecutableN ...... ]])} ``` - For illustration of input data preparation, please see samples (for example, `classification_sample.py`). + For illustration of input data preparation, please see the samples (for example, `classification_sample.py`). + * `start_async(request_id, inputs=None)` - * Description: - Starts asynchronous inference for specified infer request. - Wraps `async_infer()` method of the `InferRequest` class + * Description: Starts asynchronous inference for specified infer request. + Wraps `async_infer()` method of the `InferRequest` class. * Parameters: * `request_id` - Index of infer request to start inference * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer - * Return value: - A handler of specified infer request, which is an instance of the `InferRequest` class. + * Return value: A handler of specified infer request, which is an instance of the `InferRequest` class. * Usage example: ```py >>> infer_request_handle = exec_net.start_async(request_id=0, inputs={input_blob: image}) @@ -399,21 +595,66 @@ There is no explicit class constructor. To make a valid instance of `ExecutableN For more details about infer requests processing, see `classification_sample_async.py` (simplified case) and `object_detection_demo_ssd_async.py` (real asynchronous use case) samples. +* `get_exec_graph_info()` + * Description: Gets executable graph information from a device + * Parameters: None + * Return value: An instance of `IENetwork` + * Usage_example: + +```py +net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) +plugin = IEPlugin(device="CPU") +exec_net = plugin.load(network=net, num_requsts=2) +exec_graph = exec_net.get_exec_graph_info() +``` + +* `get_metric(metric_name: str)` + * Description: - Gets general runtime metric for an executable network. It can be network name, actual device ID on + which executable network is running or all other properties which cannot be changed dynamically. + + * Parameters: + * metric_name - A metric name to request. + * Return value: A metric value corresponding to a metric key. + * Usage example +```py +ie = IECore() +net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) +exec_net = ie.load_network(net, "CPU") +exec_net.get_metric("NETWORK_NAME") +``` + +* `get_config(metric_config: str)` + * Description: - Gets configuration for current executable network. The method is responsible to extract information + * which affects executable network execution + + * Parameters: + * config_name - A configuration parameter name to request. + * Return value: A configuration value corresponding to a configuration key. + * Usage example +```py +ie = IECore() +net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file) +exec_net = ie.load_network(net, "CPU") +exec_net.get_metric("DEVICE_ID") +``` + + ## InferRequest Class This class provides an interface to infer requests of `ExecutableNetwork` and serves to handle infer requests execution and to set and get output data. -### Class Constructor +### Class Constructor There is no explicit class constructor. To make a valid `InferRequest` instance, use `load()` method of the `IEPlugin` class with specified number of requests to get `ExecutableNetwork` instance which stores infer requests. -### Class attributes +### Class Attributes * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer * `outputs` - A dictionary that maps output layer names to `numpy.ndarray` objects with output data of the layer - * Usage example: + + Usage example: ```py >>> exec_net.requests[0].inputs['data'][:] = image >>> exec_net.requests[0].infer() @@ -424,18 +665,16 @@ array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01, 2.26027006e-03, 2.12283316e-03 ...]) ``` -### Instance Methods +### Instance Methods It is not recommended to run inference directly on `InferRequest` instance. To run inference, please use simplified methods `infer()` and `start_async()` of `ExecutableNetwork`. * `infer(inputs=None)` - * Description: - Starts synchronous inference of the infer request and fill outputs array + * Description: Starts synchronous inference of the infer request and fill outputs array * Parameters: * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer - * Return value: - None + * Return value: None * Usage example: ```py >>> exec_net = plugin.load(network=net, num_requests=2) @@ -445,14 +684,13 @@ To run inference, please use simplified methods `infer()` and `start_async()` of array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01, 5.45198545e-02, 2.44456064e-02, 5.41366823e-03, 3.42589128e-03, 2.26027006e-03, 2.12283316e-03 ...]) -``` +``` + * `async_infer(inputs=None)` - * Description: - Starts asynchronous inference of the infer request and fill outputs array + * Description: Starts asynchronous inference of the infer request and fill outputs array * Parameters: * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer - * Return value: - None + * Return value: None * Usage example: ```py >>> exec_net = plugin.load(network=net, num_requests=2) @@ -463,10 +701,10 @@ array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01, array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01, 5.45198545e-02, 2.44456064e-02, 5.41366823e-03, 3.42589128e-03, 2.26027006e-03, 2.12283316e-03 ...]) -``` +``` + * `wait(timeout=-1)` - * Description: - Waits for the result to become available. Blocks until specified timeout elapses or the result + * Description: Waits for the result to become available. Blocks until specified timeout elapses or the result becomes available, whichever comes first. > **NOTE:** There are special values of the timeout parameter: * 0 - Immediately returns the inference status. It does not block or interrupt execution. @@ -475,14 +713,12 @@ array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01, * Parameters: * `timeout` - Time to wait in milliseconds or special (0, -1) cases described above. If not specified, `timeout` value is set to -1 by default. - * Usage example: - See `async_infer()` method of the the `InferRequest` class. + * Usage example: See `async_infer()` method of the the `InferRequest` class. + * `get_perf_counts()` - * Description: - Queries performance measures per layer to get feedback of what is the most time consuming layer. + * Description: Queries performance measures per layer to get feedback of what is the most time consuming layer. > **NOTE**: Performance counters data and format depends on the plugin - * Parameters: - None + * Parameters: None * Usage example: ```py >>> exec_net = plugin.load(network=net, num_requests=2) @@ -501,16 +737,40 @@ array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01, ... } ``` + * `set_batch(size)` * Description: Sets new batch size for certain infer request when dynamic batching is enabled in executable network that created this request. > **NOTE:** Support of dynamic batch size depends on the target plugin. * Parameters: - * `batch` - new batch size to be used by all the following inference calls for this request. + * `batch` - New batch size to be used by all the following inference calls for this request * Usage example: ```py >>> plugin.set_config({"DYN_BATCH_ENABLED": "YES"}) >>> exec_net = plugin.load(network=net) >>> exec_net.requests[0].set_batch(inputs_count) ``` -Please refer to `dynamic_batch_demo.py` to see the full usage example. + +* `set_completion_callback(py_callback, py_data = None)` + * Description: Sets a callback function that is called on success or failure of an asynchronous request + * Parameters: + * `py_callback` - Any defined or lambda function + * `py_data` - Data that is passed to the callback function + * Return value: None + * Usage example: + +```py + +callback = lambda status, py_data: print("Request with id {} finished with status {}".format(py_data, status)) + +net = IENetwork("./model.xml", "./model.bin") +ie = IECore() +exec_net = ie.load_network(net, "CPU", num_requests=4) + +for id, req in enumerate(exec_net.requests): + req.set_completion_callback(py_callback=callback, py_data=id) + +for req in exec_net.requests: + req.async_infer({"data": img}) + +``` diff --git a/inference-engine/ie_bridges/python/requirements.txt b/inference-engine/ie_bridges/python/requirements.txt index cb990c6cdb998e..273aebcaa81e35 100644 --- a/inference-engine/ie_bridges/python/requirements.txt +++ b/inference-engine/ie_bridges/python/requirements.txt @@ -1,3 +1,4 @@ opencv-python numpy -cython \ No newline at end of file +cython +progress diff --git a/inference-engine/ie_bridges/python/sample/affinity_setting_sample/affinity_setting_sample.py b/inference-engine/ie_bridges/python/sample/affinity_setting_sample/affinity_setting_sample.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/README.md b/inference-engine/ie_bridges/python/sample/benchmark_app/README.md index 84a383ae86150e..7bb4b20f7ec0ab 100644 --- a/inference-engine/ie_bridges/python/sample/benchmark_app/README.md +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/README.md @@ -1,19 +1,22 @@ -# Benchmark Application Python* Demo +# Benchmark Python* Application This topic demonstrates how to run the Benchmark Application demo, which performs inference using convolutional networks. ## How It Works -> **NOTE:** To achieve benchmark results similar to the official published results, set CPU frequency to 2.9GHz and GPU frequency to 1GHz. - -Upon the start-up, the application reads command-line parameters and loads a network and images to the Inference Engine plugin. The number of infer requests and execution approach depend on a mode defined with the `-api` command-line parameter. +Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine +plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend +on the mode defined with the `-api` command-line parameter. > **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). ### Synchronous API + For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values: * Number of iterations defined with the `-niter` command-line argument -* Predefined duration if `-niter` is skipped. Predefined duration value depends on device. +* Time duration specified with the `-t` command-line argument +* Both of them (execution will continue until both conditions are met) +* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device. During the execution, the application collects two types of metrics: * Latency for each infer request executed with `Infer` method @@ -24,72 +27,103 @@ Reported latency value is calculated as mean value of all collected latencies. R ### Asynchronous API For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of infer is specified with the `-nireq` command-line parameter. A number of executions is defined by one of the two values: * Number of iterations defined with the `-niter` command-line argument -* Predefined duration if `-niter` is skipped. Predefined duration value depends on device. +* Time duration specified with the `-t` command-line argument +* Both of them (execution will continue until both conditions are met) +* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device. -The infer requests are executed asynchronously. `Wait` method is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration. +The infer requests are executed asynchronously. Callback is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration. ## Running +Notice that the benchmark_app usually produces optimal performance for any device out of the box. + +**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.: +``` +$benchmark_app -m -i -d CPU +``` Running the application with the `-h` or `--help`' option yields the following usage message: -```python3 benchmark_app.py -h``` -The command yields the following usage message: ``` - usage: benchmark_app.py [-h] -i PATH_TO_IMAGES -m PATH_TO_MODEL - [-c PATH_TO_CLDNN_CONFIG] [-l PATH_TO_EXTENSION] - [-api {sync,async}] [-d TARGET_DEVICE] - [-niter NUMBER_ITERATIONS] - [-nireq NUMBER_INFER_REQUESTS] - [-nthreads NUMBER_THREADS] [-b BATCH_SIZE] - [-pin {YES,NO}] +usage: benchmark_app.py [-h] [-i PATH_TO_INPUT] -m PATH_TO_MODEL + [-pp PLUGIN_DIR] [-d TARGET_DEVICE] + [-l PATH_TO_EXTENSION] [-c PATH_TO_CLDNN_CONFIG] + [-api {sync,async}] [-niter NUMBER_ITERATIONS] + [-nireq NUMBER_INFER_REQUESTS] [-b BATCH_SIZE] + [-stream_output [STREAM_OUTPUT]] [-t TIME] + [-progress [PROGRESS]] [-nstreams NUMBER_STREAMS] + [-nthreads NUMBER_THREADS] [-pin {YES,NO}] + [--exec_graph_path EXEC_GRAPH_PATH] + [-pc [PERF_COUNTS]] Options: -h, --help Show this help message and exit. - -i PATH_TO_IMAGES, --path_to_images PATH_TO_IMAGES - Required. Path to a folder with images or to image - files. + -i PATH_TO_INPUT, --path_to_input PATH_TO_INPUT + Optional. Path to a folder with images and/or binaries + or to specific image or binary file. -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL Required. Path to an .xml file with a trained model. - -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG - Optional. Required for GPU custom kernels. Absolute - path to an .xml file with the kernels description. + -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR + Optional. Path to a plugin folder. + -d TARGET_DEVICE, --target_device TARGET_DEVICE + Optional. Specify a target device to infer on: CPU, + GPU, FPGA, HDDL or MYRIAD. + Use "-d HETERO:" format to specify HETERO plugin. -l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION + Optional. Required for CPU custom layers. Absolute + path to a shared library with the kernels + implementations. + -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG Optional. Required for GPU custom kernels. Absolute path to an .xml file with the kernels description. -api {sync,async}, --api_type {sync,async} Optional. Enable using sync/async API. Default value - is sync - -d TARGET_DEVICE, --target_device TARGET_DEVICE - Optional. Specify a target device to infer on: CPU, - GPU, FPGA, HDDL or MYRIAD. Use "-d HETERO:" format to specify HETERO - plugin. The application looks for a suitable plugin - for the specified device. + is async. -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. -nireq NUMBER_INFER_REQUESTS, --number_infer_requests NUMBER_INFER_REQUESTS - Optional. Number of infer requests (default value is - 2). - -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS - Number of threads to use for inference on the CPU - (including Hetero cases). + Optional. Number of infer requests. Default value is + determined automatically for device. -b BATCH_SIZE, --batch_size BATCH_SIZE Optional. Batch size value. If not specified, the batch size value is determined from IR + -stream_output [STREAM_OUTPUT] + Optional. Print progress as a plain text. When + specified, an interactive progress bar is replaced + with a multiline output. + -t TIME, --time TIME Optional. Time in seconds to execute topology. + -progress [PROGRESS] Optional. Show progress bar (can affect performance + measurement). Default values is "False". + -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS + Optional. Number of streams to use for inference on the CPU/GPU in throughput mode + (for HETERO device case use format :,: or just ). + -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS + Number of threads to use for inference on the CPU + (including HETERO case). -pin {YES,NO}, --infer_threads_pinning {YES,NO} Optional. Enable ("YES" is default value) or disable ("NO")CPU threads pinning for CPU-involved inference. + --exec_graph_path EXEC_GRAPH_PATH + Optional. Path to a file where to store executable + graph information serialized. + -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS] + Optional. Report performance counters. + ``` Running the application with the empty list of options yields the usage message given above and an error message. +Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values. +If a model has only image input(s), please a provide folder with images or a path to an image as input. +If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input. +If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. + To run the demo, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). > **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). -For example, to do inference on an image using a trained network with multiple outputs on CPU, run the following command: +For example, to do inference of an image using a trained network with multiple outputs on CPU, run the following command: ``` python3 benchmark_app.py -i /inputImage.bmp -m /multiple-output.xml -d CPU @@ -97,17 +131,22 @@ python3 benchmark_app.py -i /inputImage.bmp -m /mu ## Demo Output -Application output depends on a used API. For synchronous API, the application outputs latency and throughput: -``` -[ INFO ] Start inference synchronously (10 s duration) -[BENCHMARK RESULT] Latency is 15.5520 msec -[BENCHMARK RESULT] Throughput is 1286.0082 FPS -``` +The application outputs number of executed iterations, total duration of execution, latency and throughput. +Additionally, if you set the `-pc` parameter, the application outputs performance counters. +If you set `-exec_graph_path`, the application reports executable graph information serialized. -For asynchronous API, the application outputs only throughput: ``` -[ INFO ] Start inference asynchronously (10 s duration, 8 inference requests in parallel) -[BENCHMARK RESULT] Throughput is 1444.2591 FPS +[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams) +Progress: |................................| 100.00% + +[Step 9/9] Dumping statistics report +Progress: |................................| 100.00% + +Count: 4408 iterations +Duration: 60153.52 ms +Latency: 51.8244 ms +Throughput: 73.28 FPS + ``` ## See Also diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/__init__.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/__init__.py index 86feb3005e503e..3d35f203c67aa3 100644 --- a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/__init__.py +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/__init__.py @@ -15,4 +15,3 @@ """ from .benchmark import main -from .utils.constants import HELP_MESSAGES diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py index 462e0309270871..ccee15538961d7 100644 --- a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py @@ -15,48 +15,79 @@ """ from statistics import median -from openvino.inference_engine import IENetwork, IEPlugin - -from .utils.benchmark_utils import * +from openvino.inference_engine import IENetwork, IECore, get_version + +from .utils.parameters import * +from .utils.inputs_filling import * +from .utils.utils import * +from .utils.infer_request_wrap import * +from .utils.progress_bar import * + +def getDurationInMilliseconds(duration): + return duration * 1000 + +def static_vars(**kwargs): + def decorate(func): + for k in kwargs: + setattr(func, k, kwargs[k]) + return func + return decorate + +@static_vars(step_id = 0) +def next_step(additional_info = ""): + step_names = { + 1 : "Parsing and validating input arguments", + 2 : "Loading Inference Engine", + 3 : "Read the Intermediate Representation of the network", + 4 : "Resizing network to match image sizes and given batch", + 5 : "Configuring input of the model", + 6 : "Setting device configuration", + 7 : "Loading the model to the device", + 8 : "Setting optimal runtime parameters", + 9 : "Creating infer requests and filling input blobs with images", + 10 : "Measuring performance", + 11 : "Dumping statistics report", + } + + next_step.step_id += 1 + if (next_step.step_id not in step_names.keys()): + raise Exception("Step ID " + str(next_step.step_id) + " is out of total steps number " + len(step_names)) + + print("[Step {}/{}] {}".format(next_step.step_id, len(step_names), step_names[next_step.step_id]) + (" (" + additional_info + ")" if len(additional_info) else "")) def main(args=None): try: - if args is None: + # ------------------------------ 1. Parsing and validating input arguments ------------------------------------- + next_step() + + if not args: args = parse_args() - validate_args(args) + # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- + next_step() + + device_name = args.target_device.upper() - # --------------------------------- 1. Load Plugin for inference engine --------------------------------- - logging.info("Loading plugin") - plugin = IEPlugin(args.target_device) + ie = IECore() - config = dict() - if CPU_DEVICE_NAME in args.target_device: + if CPU_DEVICE_NAME in device_name: if args.path_to_extension: - plugin.add_cpu_extension(args.path_to_extension) - # limit threading for CPU portion of inference - if args.number_threads is not None: - config.update({'CPU_THREADS_NUM': str(args.number_threads)}) - # pin threads for CPU portion of inference - config.update({'CPU_BIND_THREAD': args.infer_threads_pinning}) - # for pure CPU execution, more throughput-oriented execution via streams - if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device: - config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)}) - elif GPU_DEVICE_NAME in args.target_device: + ie.add_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME) + if GPU_DEVICE_NAME in device_name: if args.path_to_cldnn_config: - config.update({'CONFIG_FILE': args.path_to_cldnn_config}) + ie.set_config({'CONFIG_FILE' : args.path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config)) - elif MYRIAD_DEVICE_NAME in args.target_device: - config.update({'LOG_LEVEL': 'LOG_INFO'}) - config.update({'VPU_LOG_LEVEL': 'LOG_INFO'}) - - plugin.set_config(config) - logger.info("Device is {}".format(plugin.device)) - logger.info("Plugin version is {}".format(plugin.version)) + logger.info("InferenceEngine:\n{: <9}{}".format("",get_version())) + version_string = "Device is {}\n".format(device_name) + for device, version in ie.get_versions(device_name).items(): + version_string += "{: <9}{}\n".format("", device) + version_string += "{: <9}{:.<24}{} {}.{}\n".format("",version.description," version", version.major, version.minor) + version_string += "{: <9}{:.<24} {}\n".format("","Build", version.build_number) + logger.info(version_string) - # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) --------------------- - logger.info("Loading network files") + # --------------------- 3. Read the Intermediate Representation of the network --------------------------------- + next_step() xml_filename = os.path.abspath(args.path_to_model) head, tail = os.path.splitext(xml_filename) @@ -68,132 +99,245 @@ def main(args=None): if len(input_info) == 0: raise AttributeError('No inputs info is provided') - elif len(input_info) != 1: - raise AttributeError("only one input layer network is supported") - # -------------------------------------- 3. Change network batch_size ------------------------------------- + # --------------------- 4. Resizing network to match image sizes and given batch ------------------------------- + next_step() + batch_size = ie_network.batch_size - key = list(input_info.keys()).pop() - precision = input_info[key].precision + precision = ie_network.precision if args.batch_size and args.batch_size != ie_network.batch_size: - # deepcopy input_info - shape = input_info[key].shape - # We support models having only one input layers - if input_info[key].layout != LAYOUT_TYPE: - raise Exception('Unsupported model for batch size changing in automatic mode') - shape[BATCH_SIZE_ELEM] = args.batch_size - ie_network.reshape({key: shape}) - - input_info = ie_network.inputs + new_shapes = {} + for key in input_info.keys(): + shape = input_info[key].shape + layout = input_info[key].layout + + batchIndex = -1 + if ((layout == 'NCHW') or (layout == 'NCDHW') or + (layout == 'NHWC') or (layout == 'NDHWC') or + (layout == 'NC')): + batchIndex = 0 + elif (layout == 'CN'): + batchIndex = 1 + + if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)): + shape[batchIndex] = args.batch_size + new_shapes[key] = shape + + if (len(new_shapes) > 0): + logger.info("Resizing network to batch = {}".format(args.batch_size)) + ie_network.reshape(new_shapes) batch_size = args.batch_size + logger.info("Network batch size: {}, precision {}".format(batch_size, precision)) - logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: " - logger_message += " {}, precision: {}".format(batch_size, precision) - logger.info(logger_message) - - # ------------------------------------- 4. Loading model to the plugin ------------------------------------- - logger.info("Loading model to the plugin") - exe_network = plugin.load(ie_network, args.number_infer_requests) - - # ------------------------------------ 5. Performance measurements stuff ----------------------------------- - inputs = get_images(os.path.abspath(args.path_to_images), batch_size) - - if batch_size < len(inputs): - logger.warn("Network batch size {} is less then images count {}" - ", some input files will be ignored".format(batch_size, len(inputs))) - - input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)} + # --------------------- 5. Configuring input of the model ------------------------------------------------------ + next_step() - times = list() - duration = 0 + for key in input_info.keys(): + if (isImage(input_info[key])): + # Set the precision of input data provided by the user + # Should be called before load of the network to the plugin + input_info[key].precision = 'U8' - if args.number_iterations is None: - duration = get_duration_in_secs(args.target_device) + # --------------------- 6. Setting device configuration -------------------------------------------------------- + next_step() - if args.api_type == 'sync': + devices = parseDevices(device_name) + device_nstreams = parseValuePerDevice(devices, args.number_streams) + for device in devices: + if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys + ## limit threading for CPU portion of inference + if args.number_threads: + ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, device) - # warming up - out of scope - exe_network.infer(input_images) - - if args.number_iterations is not None: - logger.info( - "Start inference synchronously ({}) sync inference executions".format(args.number_iterations)) - for iteration in range(args.number_iterations): - sync_infer_request(exe_network, times, input_images) + # pin threads for CPU portion of inference + ie.set_config({'CPU_BIND_THREAD': args.infer_threads_pinning}, device) - else: - logger.info("Start inference synchronously ({} s duration)".format(duration)) - start_time = datetime.now() - current_time = start_time - while (current_time - start_time).total_seconds() < duration: - current_time = sync_infer_request(exe_network, times, input_images) - - times.sort() - latency = median(times) - fps = batch_size / latency - - print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3)) - print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps)) + ## for CPU execution, more throughput-oriented execution via streams + # for pure CPU execution, more throughput-oriented execution via streams + if args.api_type == 'async': + ie.set_config({'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device)) + if device in device_nstreams.keys() + else 'CPU_THROUGHPUT_AUTO' }, device) + device_nstreams[device] = int(ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')) + + elif device == GPU_DEVICE_NAME: + if args.api_type == 'async': + ie.set_config({'GPU_THROUGHPUT_STREAMS' : str(device_nstreams.get(device)) + if device in device_nstreams.keys() + else 'GPU_THROUGHPUT_AUTO'}, device) + device_nstreams[device] = int(ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')) + + elif device == MYRIAD_DEVICE_NAME: + ie.set_config({'LOG_LEVEL': 'LOG_INFO', + 'VPU_LOG_LEVEL': 'LOG_WARNING'}, MYRIAD_DEVICE_NAME) + + # --------------------- 7. Loading the model to the device ----------------------------------------------------- + next_step() + + config = { 'PERF_COUNT' : ('YES' if args.perf_counts else 'NO')} + + exe_network = ie.load_network(ie_network, + device_name, + config=config, + num_requests=args.number_infer_requests if args.number_infer_requests else 0) + + # --------------------- 8. Setting optimal runtime parameters -------------------------------------------------- + next_step() + + ## Number of requests + infer_requests = exe_network.requests + nireq = len(infer_requests) + + ## Iteration limit + niter = args.number_iterations + if niter and args.api_type == 'async': + niter = (int)((niter + nireq - 1)/nireq)*nireq + if (args.number_iterations != niter): + logger.warn("Number of iterations was aligned by request number " + "from {} to {} using number of requests {}".format(args.number_iterations, niter, nireq)) + + ## Time limit + duration_seconds = 0 + if args.time: + ## time limit + duration_seconds = args.time + elif not args.number_iterations: + ## default time limit + duration_seconds = get_duration_in_secs(device) + + # ------------------------------------ 8. Creating infer requests and filling input blobs ---------------------- + next_step() + + request_queue = InferRequestsQueue(infer_requests) + + path_to_input = os.path.abspath(args.path_to_input) if args.path_to_input else None + requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests) + + # ------------------------------------ 9. Measuring performance ------------------------------------------------ + + progress_count = 0 + progress_bar_total_count = 10000 + + output_string = "Start inference {}ronously".format(args.api_type) + if (args.api_type == "async"): + if output_string != "": + output_string += ", " + + output_string += str(nireq) + " inference requests" + device_ss = '' + for device, nstreams in device_nstreams.items(): + if device_ss != '': + device_ss += ', ' + device_ss += "{} streams for {}".format(str(nstreams), device) + if device_ss != '': + output_string += " using " + device_ss + + output_string += ", limits: " + if niter: + if not duration_seconds: + progress_bar_total_count = niter + output_string += str(niter) + " iterations" + + if duration_seconds: + if niter: + output_string += ", " + output_string += str(getDurationInMilliseconds(duration_seconds)) + " ms duration" + + next_step(output_string) + + ## warming up - out of scope + infer_request = request_queue.getIdleRequest() + if not infer_request: + raise Exception("No idle Infer Requests!") + + if (args.api_type == 'sync'): + infer_request.infer(requests_input_data[infer_request.id]) else: - infer_requests = exe_network.requests - - if args.number_iterations is not None: - logger.info("Start inference asynchronously ({}" - " async inference executions, {} " - " inference requests in parallel".format(args.number_iterations, - args.number_infer_requests)) - else: - logger.info("Start inference asynchronously ({} s duration, " - "{} inference requests in parallel)".format(duration, args.number_infer_requests)) + infer_request.startAsync(requests_input_data[infer_request.id]) - current_inference = 0 - required_inference_requests_were_executed = False - previous_inference = 1 - args.number_infer_requests - step = 0 - steps_count = args.number_infer_requests - 1 - if args.number_iterations is not None: - steps_count += args.number_iterations + request_queue.waitAll() + request_queue.resetTimes() - # warming up - out of scope - infer_requests[0].async_infer(input_images) - infer_requests[0].wait() + start_time = datetime.now() + exec_time = (datetime.now() - start_time).total_seconds() + iteration = 0 - start_time = datetime.now() - while not required_inference_requests_were_executed or step < steps_count or \ - args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration: - exe_network.start_async(current_inference, input_images) + progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) - if previous_inference >= 0: - status = infer_requests[previous_inference].wait() - if status is not 0: - raise Exception("Infer request not completed successfully") + ## Start inference & calculate performance + ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ + while ((niter and iteration < niter) or + (duration_seconds and exec_time < duration_seconds) or + (args.api_type == "async" and iteration % nireq != 0)): + infer_request = request_queue.getIdleRequest() + if not infer_request: + raise Exception("No idle Infer Requests!") - current_inference += 1 - if current_inference >= args.number_infer_requests: - current_inference = 0 - required_inference_requests_were_executed = True - - previous_inference += 1 - if previous_inference >= args.number_infer_requests: - previous_inference = 0 - - step += 1 - - # wait the latest inference executions - for not_completed_index in range(args.number_infer_requests): - if infer_requests[not_completed_index].wait(0) != 0: - infer_requests[not_completed_index].wait() + if (args.api_type == 'sync'): + infer_request.infer(requests_input_data[infer_request.id]) + else: + infer_request.startAsync(requests_input_data[infer_request.id]) + iteration += 1 - total_duration = (datetime.now() - start_time).total_seconds() - fps = batch_size * step / total_duration + exec_time = (datetime.now() - start_time).total_seconds() - print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps)) + if niter: + progress_bar.add_progress(1) + else: + ## calculate how many progress intervals are covered by current iteration. + ## depends on the current iteration time and time of each progress interval. + ## Previously covered progress intervals must be skipped. + progress_interval_time = duration_seconds / progress_bar_total_count + new_progress = (int) (exec_time / progress_interval_time - progress_count) + progress_bar.add_progress(new_progress) + progress_count += new_progress + + ## wait the latest inference executions + request_queue.waitAll() + + total_duration_sec = request_queue.getDurationInSeconds() + times = request_queue.times + times.sort() + latency_ms = median(times) + fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec + + progress_bar.finish() + + # ------------------------------------ 10. Dumping statistics report ------------------------------------------- + next_step() + + if args.exec_graph_path: + try: + exec_graph_info = exe_network.get_exec_graph_info() + exec_graph_info.serialize(args.exec_graph_path) + logger.info("Executable graph is stored to {}".format(args.exec_graph_path)) + del exec_graph_info + except Exception as e: + logging.exception(e) + + if args.perf_counts: + for ni in range(int(nireq)): + perf_counts = exe_network.requests[ni].get_perf_counts() + logger.info("Pefrormance counts for {}-th infer request".format(ni)) + for layer, stats in perf_counts.items(): + max_layer_name = 30 + print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format(layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer, + stats['status'], + 'layerType: ' + str(stats['layer_type']), + 'realTime: ' + str(stats['real_time']), + 'cpu: ' + str(stats['cpu_time']), + 'execType: ' + str(stats['exec_type']))) + + print("Count: {} iterations".format(iteration)) + print("Duration: {:.2f} ms".format(getDurationInMilliseconds(total_duration_sec))) + print("Latency: {:.4f} ms".format(latency_ms)) + print("Throughput: {:.2f} FPS".format(fps)) del exe_network - del plugin - + del ie + next_step.step_id = 0 except Exception as e: logging.exception(e) diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/benchmark_utils.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/benchmark_utils.py deleted file mode 100644 index 2f6f38be5e155e..00000000000000 --- a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/benchmark_utils.py +++ /dev/null @@ -1,124 +0,0 @@ -""" - Copyright (C) 2018-2019 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -import logging -import argparse -import os -import cv2 -import numpy as np -import sys - -from glob import glob -from random import choice -from datetime import datetime -from fnmatch import fnmatch - -from .constants import * - -logging.basicConfig(format="[ %(levelname)s ] %(message)s", level=logging.INFO, stream=sys.stdout) -logger = logging.getLogger('BenchmarkApp') - - -def validate_args(args): - if args.number_iterations is not None and args.number_iterations < 0: - raise Exception("Number of iterations should be positive (invalid -niter option value)") - if args.number_infer_requests < 0: - raise Exception("Number of inference requests should be positive (invalid -nireq option value)") - if not fnmatch(args.path_to_model, XML_EXTENSION_PATTERN): - raise Exception('Path {} is not xml file.') - - -def parse_args(): - parser = argparse.ArgumentParser(add_help=False) - args = parser.add_argument_group('Options') - args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=HELP_MESSAGES["HELP"]) - args.add_argument('-i', '--path_to_images', type=str, required=True, help=HELP_MESSAGES['IMAGE_MESSAGE']) - args.add_argument('-m', '--path_to_model', type=str, required=True, help=HELP_MESSAGES['MODEL_MESSAGE']) - args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False, - help=HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE']) - args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None, - help=HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE']) - args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'], - help=HELP_MESSAGES['API_MESSAGE']) - args.add_argument('-d', '--target_device', type=str, required=False, default="CPU", - help=HELP_MESSAGES['TARGET_DEVICE_MESSAGE']) - args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None, - help=HELP_MESSAGES['ITERATIONS_COUNT_MESSAGE']) - args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=2, - help=HELP_MESSAGES['INFER_REQUESTS_COUNT_MESSAGE']) - args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None, - help=HELP_MESSAGES['INFER_NUM_THREADS_MESSAGE']) - args.add_argument('-b', '--batch_size', type=int, required=False, default=None, - help=HELP_MESSAGES['BATCH_SIZE_MESSAGE']) - args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES', - choices=['YES', 'NO'], help=HELP_MESSAGES['INFER_THREADS_PINNING_MESSAGE']) - return parser.parse_args() - - -def get_images(path_to_images, batch_size): - images = list() - if os.path.isfile(path_to_images): - while len(images) != batch_size: - images.append(path_to_images) - else: - path = os.path.join(path_to_images, '*') - files = glob(path, recursive=True) - for file in files: - file_extension = file.rsplit('.').pop().upper() - if file_extension in IMAGE_EXTENSIONS: - images.append(file) - if len(images) == 0: - raise Exception("No images found in {}".format(path_to_images)) - if len(images) < batch_size: - while len(images) != batch_size: - images.append(choice(images)) - return images - - -def get_duration_in_secs(target_device): - duration = 0 - for device in DEVICE_DURATION_IN_SECS: - if device in target_device: - duration = max(duration, DEVICE_DURATION_IN_SECS[device]) - - if duration == 0: - duration = DEVICE_DURATION_IN_SECS[UNKNOWN_DEVICE_TYPE] - logger.warn("Default duration {} seconds for unknown device {} is used".format(duration, target_device)) - - return duration - - -def fill_blob_with_image(images_path, shape): - images = np.ndarray(shape) - for item in range(shape[0]): - image = cv2.imread(images_path[item]) - - new_im_size = tuple(shape[2:]) - if image.shape[:-1] != new_im_size: - logger.warn("Image {} is resize from ({}) to ({})".format(images_path[item], image.shape[:-1], new_im_size)) - image = cv2.resize(image, new_im_size) - - image = image.transpose((2, 0, 1)) - images[item] = image - return images - - -def sync_infer_request(exe_network, times, images): - iteration_start_time = datetime.now() - exe_network.infer(images) - current_time = datetime.now() - times.append((current_time - iteration_start_time).total_seconds()) - return current_time diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/constants.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/constants.py deleted file mode 100644 index b9770a19c3e4ea..00000000000000 --- a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/constants.py +++ /dev/null @@ -1,65 +0,0 @@ -""" - Copyright (C) 2018-2019 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" - -HELP_MESSAGES = { - 'HELP': "Show this help message and exit.", - 'IMAGE_MESSAGE': "Required. Path to a folder with images or to image files.", - 'MULTI_INPUT_MESSAGE': "Optional. Path to multi input file containing.", - 'MODEL_MESSAGE': "Required. Path to an .xml file with a trained model.", - 'PLUGIN_PATH_MESSAGE': "Optional. Path to a plugin folder.", - 'API_MESSAGE': "Optional. Enable using sync/async API. Default value is sync", - 'TARGET_DEVICE_MESSAGE': "Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The application looks for a suitable plugin for the specified device.", - 'ITERATIONS_COUNT_MESSAGE': "Optional. Number of iterations. " - "If not specified, the number of iterations is calculated depending on a device.", - 'INFER_REQUESTS_COUNT_MESSAGE': "Optional. Number of infer requests (default value is 2).", - 'INFER_NUM_THREADS_MESSAGE': "Number of threads to use for inference on the CPU " - "(including Hetero cases).", - 'CUSTOM_CPU_LIBRARY_MESSAGE': "Optional. Required for CPU custom layers. " - "Absolute path to a shared library with the kernels implementations.", - 'CUSTOM_GPU_LIBRARY_MESSAGE': "Optional. Required for GPU custom kernels. Absolute path to an .xml file with the " - "kernels description.", - 'BATCH_SIZE_MESSAGE': "Optional. Batch size value. If not specified, the batch size value is determined from IR", - 'INFER_THREADS_PINNING_MESSAGE': "Optional. Enable (\"YES\" is default value) or disable (\"NO\")" - "CPU threads pinning for CPU-involved inference." -} - -DEVICE_DURATION_IN_SECS = { - "CPU": 60, - "GPU": 60, - "VPU": 60, - "MYRIAD": 60, - "FPGA": 120, - "HDDL": 60, - "UNKNOWN": 120 -} - -IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP'] - -MYRIAD_DEVICE_NAME = "MYRIAD" -CPU_DEVICE_NAME = "CPU" -GPU_DEVICE_NAME = "GPU" -UNKNOWN_DEVICE_TYPE = "UNKNOWN" - -BATCH_SIZE_ELEM = 0 - -LAYOUT_TYPE = 'NCHW' - -XML_EXTENSION = ".xml" -BIN_EXTENSION = ".bin" - -XML_EXTENSION_PATTERN = '*' + XML_EXTENSION diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/infer_request_wrap.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/infer_request_wrap.py new file mode 100644 index 00000000000000..cf801fe18a9607 --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/infer_request_wrap.py @@ -0,0 +1,81 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from ctypes import * +from datetime import datetime +import threading + +class InferReqWrap: + def __init__(self, request, id, callbackQueue): + self.id = id + self.request = request + self.request.set_completion_callback(self.callback, self.id) + self.callbackQueue = callbackQueue + + def callback(self, statusCode, userdata): + if (userdata != self.id): + print("Request ID {} does not correspond to user data {}".format(self.id, userdata)) + elif statusCode != 0: + print("Request {} failed with status code {}".format(self.id, statusCode)) + self.callbackQueue(self.id, self.request.latency) + + def startAsync(self, input_data): + self.request.async_infer(input_data) + + def infer(self, input_data): + self.request.infer(input_data) + self.callbackQueue(self.id, self.request.latency); + +class InferRequestsQueue: + def __init__(self, requests): + self.idleIds = [] + self.requests = [] + self.times = [] + for id in range(0, len(requests)): + self.requests.append(InferReqWrap(requests[id], id, self.putIdleRequest)) + self.idleIds.append(id) + self.startTime = datetime.max + self.endTime = datetime.min + self.cv = threading.Condition() + + def resetTimes(self): + self.times.clear() + + def getDurationInSeconds(self): + return (self.endTime - self.startTime).total_seconds() + + def putIdleRequest(self, id, latency): + self.cv.acquire() + self.times.append(latency) + self.idleIds.append(id) + self.endTime = max(self.endTime, datetime.now()) + self.cv.notify() + self.cv.release() + + def getIdleRequest(self): + self.cv.acquire() + while len(self.idleIds) == 0: + self.cv.wait() + id = self.idleIds.pop(); + self.startTime = min(datetime.now(), self.startTime); + self.cv.release() + return self.requests[id] + + def waitAll(self): + self.cv.acquire() + while len(self.idleIds) != len(self.requests): + self.cv.wait() + self.cv.release() diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/inputs_filling.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/inputs_filling.py new file mode 100644 index 00000000000000..00a29452471605 --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/inputs_filling.py @@ -0,0 +1,194 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import logging +import os +import cv2 +import numpy as np +import sys + +from glob import glob +from random import choice + +from .logging import logger + +IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP'] +BINARY_EXTENSIONS = ['BIN'] + +def isImage(blob): + if (blob.layout != "NCHW"): + return False + channels = blob.shape[1] + return (channels == 3) + +def isImageInfo(blob): + if (blob.layout != "NC"): + return False + channels = blob.shape[1] + return (channels >= 2) + +def getInputs(path_to_input, batch_size, input_info, requests): + input_image_sizes = {} + for key in input_info.keys(): + if (isImage(input_info[key])): + input_image_sizes[key] = (input_info[key].shape[2], input_info[key].shape[3]) + logger.info("Network input '{}' precision {}, dimensions ({}): {}".format(key, + input_info[key].precision, + input_info[key].layout, + " ".join(str(x) for x in input_info[key].shape))) + + images_count = len(input_image_sizes.keys()) + binaries_count = len(input_info) - images_count + + image_files = list() + binary_files = list() + + if (path_to_input): + image_files = get_files_by_extensions(path_to_input, IMAGE_EXTENSIONS) + image_files.sort() + binary_files = get_files_by_extensions(path_to_input, BINARY_EXTENSIONS) + binary_files.sort() + + if (len(image_files) == 0) and (len(binary_files) == 0): + logger.warn("No input files were given: all inputs will be filled with random values!") + else: + binary_to_be_used = binaries_count*batch_size*len(requests) + if binary_to_be_used > 0 and len(binary_files) == 0: + logger.warn("No supported binary inputs found! Please check your file extensions: {}".format(",".join(BINARY_EXTENSIONS))) + elif binary_to_be_used > len(binary_files): + logger.warn("Some binary input files will be duplicated: {} files are required, but only {} were provided".format(binary_to_be_used, len(binary_files))) + elif binary_to_be_used < len(binary_files): + logger.warn("Some binary input files will be ignored: only {} files are required from {}".format(binary_to_be_used, len(binary_files))) + + images_to_be_used = images_count*batch_size*len(requests) + if images_to_be_used > 0 and len(image_files) == 0: + logger.warn("No supported image inputs found! Please check your file extensions: {}".format(",".join(IMAGE_EXTENSIONS))) + elif images_to_be_used > len(image_files): + logger.warn("Some image input files will be duplicated: {} files are required, but only {} were provided".format(images_to_be_used, len(image_files))) + elif images_to_be_used < len(image_files): + logger.warn("Some image input files will be ignored: only {} files are required from {}".format(images_to_be_used, len(image_files))) + + requests_input_data = [] + for request_id in range(0, len(requests)): + logger.info("Infer Request {} filling".format(request_id)) + input_data = {} + keys = list(input_info.keys()) + for key in keys: + if isImage(input_info[key]): + # input is image + if (len(image_files) > 0): + input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key), len(keys), input_info[key].shape) + continue + + # input is binary + if (len(binary_files) > 0): + input_data[key] = fill_blob_with_binary(binary_files, input_info[key].shape) + continue + + # most likely input is image info + if isImageInfo(input_info[key]) and len(input_image_sizes) == 1: + image_size = input_image_sizes[list(input_image_sizes.keys()).pop()] + logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" + + str(image_size[1])) + input_data[key] = fill_blob_with_image_info(image_size, input_info[key].shape) + continue + + # fill with random data + logger.info("Fill input '{}' with random values ({} is expected)".format(key, "image" if isImage(input_info[key]) else "some binary data")) + input_data[key] = fill_blob_with_random(input_info[key].precision, input_info[key].shape) + + requests_input_data.append(input_data) + + return requests_input_data + +def get_files_by_extensions(path_to_input, extensions): + input_files = list() + if os.path.isfile(path_to_input): + input_files.append(path_to_input) + else: + path = os.path.join(path_to_input, '*') + files = glob(path, recursive=True) + for file in files: + file_extension = file.rsplit('.').pop().upper() + if file_extension in extensions: + input_files.append(file) + return input_files + +def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_size, shape): + images = np.ndarray(shape) + image_index = request_id*batch_size*input_size + input_id + for b in range(batch_size): + image_index %= len(image_paths) + image_filename = image_paths[image_index] + image = cv2.imread(image_filename) + + new_im_size = tuple(shape[2:]) + if image.shape[:-1] != new_im_size: + logger.warn("Image {} is resized from ({}) to ({})".format(image_filename, image.shape[:-1], new_im_size)) + image = cv2.resize(image, new_im_size) + + image = image.transpose((2, 1, 0)) + images[b] = image + + image_index += input_size + return images + +def fill_blob_with_binary(binary_paths, request_id, batch_size, input_id, input_size, shape): + binaries = np.ndarray(shape) + binary_index = request_id*batch_size*input_size + input_id + for b in range(batch_size): + binary_index %= len(image_paths) + binary_filename = binary_paths[binary_index] + + binary_file_size = os.path.getsize(binary_file) + input_size = np.prod(shape)/batch_size + if (input_size != binary_file_size): + raise Exception("File " + binary_filename + " contains " << str(binary_file_size) + " bytes " + + "but network expects " + str(input_size)) + + with open(binary_file, 'r') as f: + binary_data = f.read() + + binaries[b] = binary_data + binary_index += input_size + + return binaries + +def fill_blob_with_image_info(image_size, shape): + im_info = np.ndarray(shape) + for b in range(shape[0]): + for i in range(shape[1]): + im_info[b][i] = image_size[i] if i in [0, 1] else 1 + + return im_info + +def fill_blob_with_random(precision, shape): + if precision == "FP32": + return np.random.rand(*shape).astype(np.float32) + elif precision == "FP16": + return np.random.rand(*shape).astype(np.float16) + elif precision == "I32": + return np.random.rand(*shape).astype(np.int32) + elif precision == "U8": + return np.random.rand(*shape).astype(np.uint8) + elif precision == "I8": + return np.random.rand(*shape).astype(np.int8) + elif precision == "U16": + return np.random.rand(*shape).astype(np.uint16) + elif precision == "I16": + return np.random.rand(*shape).astype(np.int16) + else: + raise Exception("Input precision is not supported: " + precision) diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/logging.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/logging.py new file mode 100644 index 00000000000000..8adf13884303c1 --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/logging.py @@ -0,0 +1,21 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import logging +import sys + +logging.basicConfig(format="[ %(levelname)s ] %(message)s", level=logging.INFO, stream=sys.stdout) +logger = logging.getLogger('BenchmarkApp') diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/parameters.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/parameters.py new file mode 100644 index 00000000000000..3e8b59b848e3be --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/parameters.py @@ -0,0 +1,92 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import argparse +from fnmatch import fnmatch + +XML_EXTENSION = ".xml" +BIN_EXTENSION = ".bin" + +XML_EXTENSION_PATTERN = '*' + XML_EXTENSION + +def validate_args(args): + if args.number_iterations is not None and args.number_iterations < 0: + raise Exception("Number of iterations should be positive (invalid -niter option value)") + if args.number_infer_requests and args.number_infer_requests < 0: + raise Exception("Number of inference requests should be positive (invalid -nireq option value)") + if not fnmatch(args.path_to_model, XML_EXTENSION_PATTERN): + raise Exception('Path {} is not xml file.') + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def parse_args(): + parser = argparse.ArgumentParser(add_help=False) + args = parser.add_argument_group('Options') + args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, + help="Show this help message and exit.") + args.add_argument('-i', '--path_to_input', type=str, required=False, + help="Optional. Path to a folder with images and/or binaries or to specific image or binary file.") + args.add_argument('-m', '--path_to_model', type=str, required=True, + help="Required. Path to an .xml file with a trained model.") + args.add_argument('-d', '--target_device', type=str, required=False, default="CPU", + help="Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. " + "Use \"-d HETERO:\" format to specify HETERO plugin. ") + args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None, + help="Optional. Required for CPU custom layers. " + "Absolute path to a shared library with the kernels implementations.") + args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False, + help="Optional. Required for GPU custom kernels. Absolute path to an .xml file with the " + "kernels description.") + args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'], + help="Optional. Enable using sync/async API. Default value is async.") + args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None, + help="Optional. Number of iterations. " + "If not specified, the number of iterations is calculated depending on a device.") + args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=None, + help="Optional. Number of infer requests. Default value is determined automatically for device.") + args.add_argument('-b', '--batch_size', type=int, required=False, default=None, + help="Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation") + args.add_argument('-stream_output', type=str2bool, required=False, default=False, nargs='?', const=True, + help="Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a " + "multiline output.") + args.add_argument('-t', '--time', type=int, required=False, default=None, + help="Optional. Time in seconds to execute topology.") + args.add_argument('-progress', type=str2bool, required=False, default=False, nargs='?', const=True, + help="Optional. Show progress bar (can affect performance measurement). Default values is \"False\".") + args.add_argument('-nstreams', '--number_streams', type=str, required=False, default=None, + help="Optional. Number of streams to use for inference on the CPU/GPU in throughput mode " + "(for HETERO device case use format :,: or just ).") + args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None, + help="Number of threads to use for inference on the CPU " + "(including HETERO case).") + args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES', choices=['YES', 'NO'], + help="Optional. Enable (\"YES\" is default value) or disable (\"NO\")" + "CPU threads pinning for CPU-involved inference.") + args.add_argument('--exec_graph_path', type=str, required=False, + help="Optional. Path to a file where to store executable graph information serialized.") + args.add_argument("-pc", "--perf_counts", type=str2bool, required=False, default=False, nargs='?', const=True, + help="Optional. Report performance counters.", ) + parsed_args = parser.parse_args() + + validate_args(parsed_args) + + return parsed_args diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/progress_bar.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/progress_bar.py new file mode 100644 index 00000000000000..f281d1f0e2f226 --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/progress_bar.py @@ -0,0 +1,51 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from progress.bar import Bar + +class ProgressBar: + def __init__(self, total_num, stream_output=False, progress_enabled=False): + self.stream_output = stream_output + self.is_finished = True + self.progress_enabled = progress_enabled + self.reset(total_num) + + def add_progress(self, num): + self.is_finished = False + if self.progress_enabled: + for i in range(num): + self.bar.next() + if self.stream_output: + print() + + def finish(self, num = 0): + if (num > 0): + self.add_progress(num) + + self.is_finished = True + if self.progress_enabled: + self.bar.finish() + print() + + def reset(self, total_num): + if self.progress_enabled: + self.bar = Bar('Progress:', max = total_num, fill = '.', suffix='%(percent).2f%%') + + def new_bar(self, total_num): + if self.is_finished: + self.reset(total_num) + else: + raise Exception("Cannot create a new bar. Current bar is still in progress") diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/utils.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/utils.py new file mode 100644 index 00000000000000..c1f0afe8230c64 --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/utils.py @@ -0,0 +1,99 @@ +""" + Copyright (C) 2018-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import multiprocessing +from .logging import logger + +VPU_DEVICE_NAME = "VPU" +MYRIAD_DEVICE_NAME = "MYRIAD" +HDDL_DEVICE_NAME = "HDDL" +FPGA_DEVICE_NAME = "FPGA" +CPU_DEVICE_NAME = "CPU" +GPU_DEVICE_NAME = "GPU" +HETERO_DEVICE_NAME = "HETERO" +UNKNOWN_DEVICE_TYPE = "UNKNOWN" + +DEVICE_DURATION_IN_SECS = { + CPU_DEVICE_NAME: 60, + GPU_DEVICE_NAME: 60, + VPU_DEVICE_NAME: 60, + MYRIAD_DEVICE_NAME: 60, + HDDL_DEVICE_NAME: 60, + FPGA_DEVICE_NAME: 120, + UNKNOWN_DEVICE_TYPE: 120 +} + +DEVICE_NIREQ_ASYNC = { + CPU_DEVICE_NAME: 2, + GPU_DEVICE_NAME: 2, + VPU_DEVICE_NAME: 4, + MYRIAD_DEVICE_NAME: 4, + HDDL_DEVICE_NAME: 100, + FPGA_DEVICE_NAME: 3, + UNKNOWN_DEVICE_TYPE: 1 +} + +def get_duration_in_secs(target_device): + duration = 0 + for device in DEVICE_DURATION_IN_SECS: + if device in target_device: + duration = max(duration, DEVICE_DURATION_IN_SECS[device]) + + if duration == 0: + duration = DEVICE_DURATION_IN_SECS[UNKNOWN_DEVICE_TYPE] + logger.warn("Default duration {} seconds is used for unknown device {}".format(duration, target_device)) + + return duration + +def get_nireq(target_device): + nireq = 0 + for device in DEVICE_NIREQ_ASYNC: + if device in target_device: + nireq = max(nireq, DEVICE_NIREQ_ASYNC[device]) + + if nireq == 0: + nireq = DEVICE_NIREQ_ASYNC[UNKNOWN_DEVICE_TYPE] + logger.warn("Default number of requests {} is used for unknown device {}".format(duration, target_device)) + + return nireq + +def parseDevices(device_string): + devices = device_string + if ':' in devices: + devices = devices.partition(':')[2] + return [ d[:d.index('(')] if '(' in d else d for d in devices.split(',') ] + +def parseValuePerDevice(devices, values_string): + ## Format: :,: or just + result = {} + if not values_string: + return result + device_value_strings = values_string.upper().split(',') + for device_value_string in device_value_strings: + device_value_vec = device_value_string.split(':') + if len(device_value_vec) == 2: + for device in devices: + if device == device_value_vec[0]: + value = int(device_value_vec[1]) + result[device_value_vec[0]] = value + break + elif len(device_value_vec) == 1: + value = int(device_value_vec[0]) + for device in devices: + result[device] = value + elif not device_value_vec: + raise Exception("Unknown string format: " + values_string) + return result diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py index 4f587a84d98914..cf1139a9af2ba2 100644 --- a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py +++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py @@ -1,37 +1,4 @@ import benchmark -from argparse import ArgumentParser, SUPPRESS - - -def parse_args(): - parser = ArgumentParser(add_help=False) - args = parser.add_argument_group('Options') - args.add_argument('-h', '--help', action='help', default=SUPPRESS, help=benchmark.HELP_MESSAGES["HELP"]) - args.add_argument('-i', '--path_to_images', type=str, required=True, - help=benchmark.HELP_MESSAGES['IMAGE_MESSAGE']) - args.add_argument('-m', '--path_to_model', type=str, required=True, - help=benchmark.HELP_MESSAGES['MODEL_MESSAGE']) - args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False, - help=benchmark.HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE']) - args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None, - help=benchmark.HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE']) - args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'], - help=benchmark.HELP_MESSAGES['API_MESSAGE']) - args.add_argument('-d', '--target_device', type=str, required=False, default="CPU", - help=benchmark.HELP_MESSAGES['TARGET_DEVICE_MESSAGE']) - args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None, - help=benchmark.HELP_MESSAGES['ITERATIONS_COUNT_MESSAGE']) - args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=2, - help=benchmark.HELP_MESSAGES['INFER_REQUESTS_COUNT_MESSAGE']) - args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None, - help=benchmark.HELP_MESSAGES['INFER_NUM_THREADS_MESSAGE']) - args.add_argument('-b', '--batch_size', type=int, required=False, default=None, - help=benchmark.HELP_MESSAGES['BATCH_SIZE_MESSAGE']) - args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES', - choices=['YES', 'NO'], help=benchmark.HELP_MESSAGES['INFER_THREADS_PINNING_MESSAGE']) - return parser.parse_args() - - if __name__ == "__main__": - args = parse_args() - benchmark.main(args) + benchmark.main() diff --git a/inference-engine/ie_bridges/python/sample/classification_sample/README.md b/inference-engine/ie_bridges/python/sample/classification_sample/README.md index 2d0d95cf0f5166..98691c7da53312 100644 --- a/inference-engine/ie_bridges/python/sample/classification_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/classification_sample/README.md @@ -22,7 +22,6 @@ The command yields the following usage message: usage: classification_sample.py [-h] -m MODEL -i INPUT [INPUT ...] [-l CPU_EXTENSION] [-pp PLUGIN_DIR] [-d DEVICE] [--labels LABELS] [-nt NUMBER_TOP] - [-ni NUMBER_ITER] [-pc] Options: -h, --help Show this help message and exit. @@ -45,9 +44,6 @@ Options: --labels LABELS Optional. Path to a labels mapping file -nt NUMBER_TOP, --number_top NUMBER_TOP Optional. Number of top results - -ni NUMBER_ITER, --number_iter NUMBER_ITER - Optional. Number of inference iterations - -pc, --perf_counts Optional. Report performance counters ``` Running the application with the empty list of options yields the usage message given above. diff --git a/inference-engine/ie_bridges/python/sample/classification_sample/classification_sample.py b/inference-engine/ie_bridges/python/sample/classification_sample/classification_sample.py index ea87429576a2f3..9336e393620f4e 100644 --- a/inference-engine/ie_bridges/python/sample/classification_sample/classification_sample.py +++ b/inference-engine/ie_bridges/python/sample/classification_sample/classification_sample.py @@ -22,7 +22,7 @@ import numpy as np import logging as log from time import time -from openvino.inference_engine import IENetwork, IEPlugin +from openvino.inference_engine import IENetwork, IECore def build_argparser(): @@ -38,7 +38,6 @@ def build_argparser(): help="Optional. Required for CPU custom layers. " "MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the" " kernels implementations.", type=str, default=None) - args.add_argument("-pp", "--plugin_dir", help="Optional. Path to a plugin folder", type=str, default=None) args.add_argument("-d", "--device", help="Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL, MYRIAD or HETERO: is " "acceptable. The sample will look for a suitable plugin for device specified. Default " @@ -46,9 +45,6 @@ def build_argparser(): default="CPU", type=str) args.add_argument("--labels", help="Optional. Path to a labels mapping file", default=None, type=str) args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int) - args.add_argument("-ni", "--number_iter", help="Optional. Number of inference iterations", default=1, type=int) - args.add_argument("-pc", "--perf_counts", help="Optional. Report performance counters", default=False, - action="store_true") return parser @@ -60,19 +56,20 @@ def main(): model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified - plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir) + log.info("Creating Inference Engine") + ie = IECore() if args.cpu_extension and 'CPU' in args.device: - plugin.add_cpu_extension(args.cpu_extension) + ie.add_extension(args.cpu_extension, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) - if plugin.device == "CPU": - supported_layers = plugin.get_supported_layers(net) + if "CPU" in args.device: + supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". - format(plugin.device, ', '.join(not_supported_layers))) + format(args.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) @@ -99,23 +96,11 @@ def main(): # Loading model to the plugin log.info("Loading model to the plugin") - exec_net = plugin.load(network=net) + exec_net = ie.load_network(network=net, device_name=args.device) # Start sync inference - log.info("Starting inference ({} iterations)".format(args.number_iter)) - infer_time = [] - for i in range(args.number_iter): - t0 = time() - res = exec_net.infer(inputs={input_blob: images}) - infer_time.append((time() - t0) * 1000) - log.info("Average running time of one iteration: {} ms".format(np.average(np.asarray(infer_time)))) - if args.perf_counts: - perf_counts = exec_net.requests[0].get_perf_counts() - log.info("Performance counters:") - print("{:<70} {:<15} {:<15} {:<15} {:<10}".format('name', 'layer_type', 'exet_type', 'status', 'real_time, us')) - for layer, stats in perf_counts.items(): - print("{:<70} {:<15} {:<15} {:<15} {:<10}".format(layer, stats['layer_type'], stats['exec_type'], - stats['status'], stats['real_time'])) + log.info("Starting inference in synchronous mode") + res = exec_net.infer(inputs={input_blob: images}) # Processing output blob log.info("Processing output blob") @@ -144,7 +129,7 @@ def main(): ' ' * space_num_after, ' ' * space_num_before_prob, probs[id])) print("\n") - + log.info("This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n") if __name__ == '__main__': sys.exit(main() or 0) diff --git a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md index 4a9114299008c4..b409f5d1d81110 100644 --- a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md +++ b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md @@ -1,28 +1,23 @@ # Image Classification Python* Sample Async -This sample demonstrates how to build and execute inference in pipelined mode on example of classifications networks. +This sample demonstrates how to run the Image Classification sample application with inference executed in the asynchronous mode. -The pipelined mode might increase the throughput of the pictures. The latency of one inference will be the same as for synchronous execution. -
-The throughput increases due to follow reasons: -* Some plugins have heterogeneity inside themselves: data transferring, execution on remote device, pre-processing and post-processing on the host. -* Using of explicit heterogeneous plugin with execution of different parts of network on different devices, for example HETERO:CPU,GPU. +The sample demonstrates how to use the new Infer Request API of Inference Engine in applications. +Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details. +The sample demonstrates how to build and execute an inference request 10 times in the asynchronous mode on example of classifications networks. +The asynchronous mode might increase the throughput of the pictures. -When two or more devices process one image, creating several infer requests and starting asynchronous inference allow for using devices in the most efficient way. -If two devices are involved in execution, the most optimal value for `-nireq` option is 2. -To process infer requests more efficiently, Classification Sample Async uses round-robin algorithm. It starts execution of the current infer request and switches to waiting for results of the previous one. After finishing of waiting, it switches infer requests and repeat the procedure. - -Another required aspect of good throughput is a number of iterations. Only with big number of iterations you can emulate the real application work and get good performance. - -The batch mode is an independent attribute on the pipelined mode. Pipelined mode works efficiently with any batch size. +The batch mode is an independent attribute on the asynchronous mode. Asynchronous mode works efficiently with any batch size. ## How It Works -Upon the start-up, the sample application reads command line parameters and loads a network and an image to the Inference -Engine plugin. -Then application creates several infer requests pointed in `-nireq` parameter and loads images for inference. +Upon the start-up, the sample application reads command line parameters and loads specified network and input images (or a +folder with images) to the Inference Engine plugin. The batch size of the network is set according to the number of read images. -Then in a loop it starts inference for the current infer request and switches to waiting for the previous one. When results are ready, it swaps infer requests. +Then, the sample creates an inference request object and assigns completion callback for it. In scope of the completion callback +handling the inference request is executed again. + +After that, the application starts inference for the first infer request and waits of 10th inference request execution being completed. When inference is done, the application outputs data to the standard output stream. @@ -39,7 +34,7 @@ The command yields the following usage message: usage: classification_sample_async.py [-h] -m MODEL -i INPUT [INPUT ...] [-l CPU_EXTENSION] [-pp PLUGIN_DIR] [-d DEVICE] [--labels LABELS] - [-nt NUMBER_TOP] [-ni NUMBER_ITER] [-pc] + [-nt NUMBER_TOP] Options: -h, --help Show this help message and exit. @@ -62,10 +57,6 @@ Options: --labels LABELS Optional. Labels mapping file -nt NUMBER_TOP, --number_top NUMBER_TOP Optional. Number of top results - -ni NUMBER_ITER, --number_iter NUMBER_ITER - Optional. Number of inference iterations - -pc, --perf_counts Optional. Report performance counters - ``` Running the application with the empty list of options yields the usage message given above and an error message. @@ -75,7 +66,7 @@ To run the sample, you can use AlexNet and GoogLeNet or other image classificati > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). -You can do inference on an image using a trained AlexNet network on FPGA with fallback to CPU using the following command: +You can do inference of an image using a trained AlexNet network on FPGA with fallback to CPU using the following command: ``` python3 classification_sample_async.py -i /cat.bmp -m /alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU -nireq 2 -ni 200 ``` diff --git a/inference-engine/ie_bridges/python/sample/classification_sample_async/classification_sample_async.py b/inference-engine/ie_bridges/python/sample/classification_sample_async/classification_sample_async.py index 601be2da33c817..9c91af4eea55c4 100644 --- a/inference-engine/ie_bridges/python/sample/classification_sample_async/classification_sample_async.py +++ b/inference-engine/ie_bridges/python/sample/classification_sample_async/classification_sample_async.py @@ -22,7 +22,56 @@ import numpy as np import logging as log from time import time -from openvino.inference_engine import IENetwork, IEPlugin +from openvino.inference_engine import IENetwork, IECore +import threading + + +class InferReqWrap: + def __init__(self, request, id, num_iter): + self.id = id + self.request = request + self.num_iter = num_iter + self.cur_iter = 0 + self.cv = threading.Condition() + self.request.set_completion_callback(self.callback, self.id) + + def callback(self, statusCode, userdata): + if (userdata != self.id): + log.error("Request ID {} does not correspond to user data {}".format(self.id, userdata)) + elif statusCode != 0: + log.error("Request {} failed with status code {}".format(self.id, statusCode)) + self.cur_iter += 1 + log.info("Completed {} Async request execution".format(self.cur_iter)) + if self.cur_iter < self.num_iter: + # here a user can read output containing inference results and put new input + # to repeat async request again + self.request.async_infer(self.input) + else: + # continue sample execution after last Asynchronous inference request execution + self.cv.acquire() + self.cv.notify() + self.cv.release() + + def execute(self, mode, input_data): + if (mode == "async"): + log.info("Start inference ({} Asynchronous executions)".format(self.num_iter)) + self.input = input_data + # Start async request for the first time. Wait all repetitions of the async request + self.request.async_infer(input_data) + self.cv.acquire() + self.cv.wait() + self.cv.release() + elif (mode == "sync"): + log.info("Start inference ({} Synchronous executions)".format(self.num_iter)) + for self.cur_iter in range(self.num_iter): + # here we start inference synchronously and wait for + # last inference request execution + self.request.infer(input_data) + log.info("Completed {} Sync request execution".format(self.cur_iter + 1)) + else: + log.error("wrong inference mode is chosen. Please use \"sync\" or \"async\" mode") + sys.exit(1) + def build_argparser(): @@ -36,20 +85,15 @@ def build_argparser(): args.add_argument("-l", "--cpu_extension", help="Optional. Required for CPU custom layers. Absolute path to a shared library with the" " kernels implementations.", type=str, default=None) - args.add_argument("-pp", "--plugin_dir", help="Optional. Path to a plugin folder", type=str, default=None) args.add_argument("-d", "--device", help="Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is " "acceptable. The sample will look for a suitable plugin for device specified. Default value is CPU", default="CPU", type=str) args.add_argument("--labels", help="Optional. Labels mapping file", default=None, type=str) args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int) - args.add_argument("-ni", "--number_iter", help="Optional. Number of inference iterations", default=1, type=int) - args.add_argument("-pc", "--perf_counts", help="Optional. Report performance counters", - default=False, action="store_true") return parser - def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() @@ -57,19 +101,20 @@ def main(): model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified - plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir) + log.info("Creating Inference Engine") + ie = IECore() if args.cpu_extension and 'CPU' in args.device: - plugin.add_cpu_extension(args.cpu_extension) + ie.add_extension(args.cpu_extension, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) - if plugin.device == "CPU": - supported_layers = plugin.get_supported_layers(net) + if "CPU" in args.device: + supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". - format(plugin.device, ', '.join(not_supported_layers))) + format(args.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) @@ -95,27 +140,20 @@ def main(): # Loading model to the plugin log.info("Loading model to the plugin") - exec_net = plugin.load(network=net) - - # Start sync inference - log.info("Starting inference ({} iterations)".format(args.number_iter)) - infer_time = [] - for i in range(args.number_iter): - t0 = time() - infer_request_handle = exec_net.start_async(request_id=0, inputs={input_blob: images}) - infer_request_handle.wait() - infer_time.append((time() - t0) * 1000) - log.info("Average running time of one iteration: {} ms".format(np.average(np.asarray(infer_time)))) - if args.perf_counts: - perf_counts = infer_request_handle.get_perf_counts() - log.info("Performance counters:") - print("{:<70} {:<15} {:<15} {:<15} {:<10}".format('name', 'layer_type', 'exet_type', 'status', 'real_time, us')) - for layer, stats in perf_counts.items(): - print("{:<70} {:<15} {:<15} {:<15} {:<10}".format(layer, stats['layer_type'], stats['exec_type'], - stats['status'], stats['real_time'])) + exec_net = ie.load_network(network=net, device_name=args.device) + + # create one inference request for asynchronous execution + request_id = 0 + infer_request = exec_net.requests[request_id]; + + num_iter = 10 + request_wrap = InferReqWrap(infer_request, request_id, num_iter) + # Start inference request execution. Wait for last execution being completed + request_wrap.execute("sync", {input_blob: images}) + # Processing output blob log.info("Processing output blob") - res = infer_request_handle.outputs[out_blob] + res = infer_request.outputs[out_blob] log.info("Top {} results: ".format(args.number_top)) if args.labels: with open(args.labels, 'r') as f: @@ -140,7 +178,7 @@ def main(): ' ' * space_num_after, ' ' * space_num_before_prob, probs[id])) print("\n") - + log.info("This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n") if __name__ == '__main__': sys.exit(main() or 0) diff --git a/inference-engine/ie_bridges/python/sample/hello_query_device/README.md b/inference-engine/ie_bridges/python/sample/hello_query_device/README.md new file mode 100644 index 00000000000000..24f10afd8fcbe9 --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/hello_query_device/README.md @@ -0,0 +1,50 @@ +# Hello Query Device Python* Sample + +This topic demonstrates how to run the Hello Query Device sample application, which queries Inference Engine +devices and prints their metrics and default configuration values. The sample shows +how to use Query Device API feature. + + +## How It Works + +The sample queries all available Inference Engine devices and prints their supported metrics and plugin configuration parameters. + + +## Running + +The sample has no command-line parameters. To see the report, run the following command: + +``` +python3 hello_query_device.py +``` + +## Sample Output + +The application prints all available devices with their supported metrics and default values for configuration parameters. For example: + +``` +Available devices: + Device: CPU + Metrics: + AVAILABLE_DEVICES: 0 + SUPPORTED_METRICS: AVAILABLE_DEVICES, SUPPORTED_METRICS, FULL_DEVICE_NAME, OPTIMIZATION_CAPABILITIES, SUPPORTED_CONFIG_KEYS, RANGE_FOR_ASYNC_INFER_REQUESTS, RANGE_FOR_STREAMS + FULL_DEVICE_NAME: Intel(R) Core(TM) i7-8700K CPU @ 3.70GHz + OPTIMIZATION_CAPABILITIES: WINOGRAD, FP32, INT8, BIN + SUPPORTED_CONFIG_KEYS: CPU_BIND_THREAD, CPU_THREADS_NUM, CPU_THROUGHPUT_STREAMS, DUMP_EXEC_GRAPH_AS_DOT, DYN_BATCH_ENABLED, DYN_BATCH_LIMIT, EXCLUSIVE_ASYNC_REQUESTS, PERF_COUNT, RANGE_FOR_ASYNC_INFER_REQUESTS, RANGE_FOR_STREAMS + RANGE_FOR_ASYNC_INFER_REQUESTS: 0, 6, 1 + RANGE_FOR_STREAMS: 1, 12 + + Default values for device configuration keys: + CPU_BIND_THREAD: YES + CPU_THREADS_NUM: 0 + CPU_THROUGHPUT_STREAMS: 1 + DUMP_EXEC_GRAPH_AS_DOT: + DYN_BATCH_ENABLED: NO + DYN_BATCH_LIMIT: 0 + EXCLUSIVE_ASYNC_REQUESTS: NO + PERF_COUNT: NO + RANGE_FOR_ASYNC_INFER_REQUESTS: 1 + RANGE_FOR_STREAMS: 6 +``` +## See Also +* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/ie_bridges/python/sample/hello_query_device/hello_query_device.py b/inference-engine/ie_bridges/python/sample/hello_query_device/hello_query_device.py new file mode 100644 index 00000000000000..9d78438eebd967 --- /dev/null +++ b/inference-engine/ie_bridges/python/sample/hello_query_device/hello_query_device.py @@ -0,0 +1,35 @@ +import sys + +from openvino.inference_engine import IECore + + +def param_to_string(metric): + if isinstance(metric, (list, tuple)): + return ", ".join([str(val) for val in metric]) + elif isinstance(metric, dict): + str_param_repr = "" + for k, v in metric.items(): + str_param_repr += "{}: {}\n".format(k, v) + return str_param_repr + else: + return str(metric) + + +def main(): + ie = IECore() + print("Available devices:") + for device in ie.available_devices: + print("\tDevice: {}".format(device)) + print("\tMetrics:") + for metric in ie.get_metric(device, "SUPPORTED_METRICS"): + metric_val = ie.get_metric(device, metric) + print("\t\t{}: {}".format(metric, param_to_string(metric_val))) + + print("\n\tDefault values for device configuration keys:") + for cfg in ie.get_metric(device, "SUPPORTED_CONFIG_KEYS"): + cfg_val = ie.get_config(device, cfg) + print("\t\t{}: {}".format(cfg, param_to_string(cfg_val))) + + +if __name__ == '__main__': + sys.exit(main() or 0) diff --git a/inference-engine/ie_bridges/python/sample/image_net_synset.txt b/inference-engine/ie_bridges/python/sample/image_net_synset.txt deleted file mode 100644 index a9e8c7f50d144e..00000000000000 --- a/inference-engine/ie_bridges/python/sample/image_net_synset.txt +++ /dev/null @@ -1,1000 +0,0 @@ -n01440764 tench, Tinca tinca -n01443537 goldfish, Carassius auratus -n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias -n01491361 tiger shark, Galeocerdo cuvieri -n01494475 hammerhead, hammerhead shark -n01496331 electric ray, crampfish, numbfish, torpedo -n01498041 stingray -n01514668 cock -n01514859 hen -n01518878 ostrich, Struthio camelus -n01530575 brambling, Fringilla montifringilla -n01531178 goldfinch, Carduelis carduelis -n01532829 house finch, linnet, Carpodacus mexicanus -n01534433 junco, snowbird -n01537544 indigo bunting, indigo finch, indigo bird, Passerina cyanea -n01558993 robin, American robin, Turdus migratorius -n01560419 bulbul -n01580077 jay -n01582220 magpie -n01592084 chickadee -n01601694 water ouzel, dipper -n01608432 kite -n01614925 bald eagle, American eagle, Haliaeetus leucocephalus -n01616318 vulture -n01622779 great grey owl, great gray owl, Strix nebulosa -n01629819 European fire salamander, Salamandra salamandra -n01630670 common newt, Triturus vulgaris -n01631663 eft -n01632458 spotted salamander, Ambystoma maculatum -n01632777 axolotl, mud puppy, Ambystoma mexicanum -n01641577 bullfrog, Rana catesbeiana -n01644373 tree frog, tree-frog -n01644900 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui -n01664065 loggerhead, loggerhead turtle, Caretta caretta -n01665541 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea -n01667114 mud turtle -n01667778 terrapin -n01669191 box turtle, box tortoise -n01675722 banded gecko -n01677366 common iguana, iguana, Iguana iguana -n01682714 American chameleon, anole, Anolis carolinensis -n01685808 whiptail, whiptail lizard -n01687978 agama -n01688243 frilled lizard, Chlamydosaurus kingi -n01689811 alligator lizard -n01692333 Gila monster, Heloderma suspectum -n01693334 green lizard, Lacerta viridis -n01694178 African chameleon, Chamaeleo chamaeleon -n01695060 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis -n01697457 African crocodile, Nile crocodile, Crocodylus niloticus -n01698640 American alligator, Alligator mississipiensis -n01704323 triceratops -n01728572 thunder snake, worm snake, Carphophis amoenus -n01728920 ringneck snake, ring-necked snake, ring snake -n01729322 hognose snake, puff adder, sand viper -n01729977 green snake, grass snake -n01734418 king snake, kingsnake -n01735189 garter snake, grass snake -n01737021 water snake -n01739381 vine snake -n01740131 night snake, Hypsiglena torquata -n01742172 boa constrictor, Constrictor constrictor -n01744401 rock python, rock snake, Python sebae -n01748264 Indian cobra, Naja naja -n01749939 green mamba -n01751748 sea snake -n01753488 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus -n01755581 diamondback, diamondback rattlesnake, Crotalus adamanteus -n01756291 sidewinder, horned rattlesnake, Crotalus cerastes -n01768244 trilobite -n01770081 harvestman, daddy longlegs, Phalangium opilio -n01770393 scorpion -n01773157 black and gold garden spider, Argiope aurantia -n01773549 barn spider, Araneus cavaticus -n01773797 garden spider, Aranea diademata -n01774384 black widow, Latrodectus mactans -n01774750 tarantula -n01775062 wolf spider, hunting spider -n01776313 tick -n01784675 centipede -n01795545 black grouse -n01796340 ptarmigan -n01797886 ruffed grouse, partridge, Bonasa umbellus -n01798484 prairie chicken, prairie grouse, prairie fowl -n01806143 peacock -n01806567 quail -n01807496 partridge -n01817953 African grey, African gray, Psittacus erithacus -n01818515 macaw -n01819313 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita -n01820546 lorikeet -n01824575 coucal -n01828970 bee eater -n01829413 hornbill -n01833805 hummingbird -n01843065 jacamar -n01843383 toucan -n01847000 drake -n01855032 red-breasted merganser, Mergus serrator -n01855672 goose -n01860187 black swan, Cygnus atratus -n01871265 tusker -n01872401 echidna, spiny anteater, anteater -n01873310 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus -n01877812 wallaby, brush kangaroo -n01882714 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus -n01883070 wombat -n01910747 jellyfish -n01914609 sea anemone, anemone -n01917289 brain coral -n01924916 flatworm, platyhelminth -n01930112 nematode, nematode worm, roundworm -n01943899 conch -n01944390 snail -n01945685 slug -n01950731 sea slug, nudibranch -n01955084 chiton, coat-of-mail shell, sea cradle, polyplacophore -n01968897 chambered nautilus, pearly nautilus, nautilus -n01978287 Dungeness crab, Cancer magister -n01978455 rock crab, Cancer irroratus -n01980166 fiddler crab -n01981276 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica -n01983481 American lobster, Northern lobster, Maine lobster, Homarus americanus -n01984695 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish -n01985128 crayfish, crawfish, crawdad, crawdaddy -n01986214 hermit crab -n01990800 isopod -n02002556 white stork, Ciconia ciconia -n02002724 black stork, Ciconia nigra -n02006656 spoonbill -n02007558 flamingo -n02009229 little blue heron, Egretta caerulea -n02009912 American egret, great white heron, Egretta albus -n02011460 bittern -n02012849 crane -n02013706 limpkin, Aramus pictus -n02017213 European gallinule, Porphyrio porphyrio -n02018207 American coot, marsh hen, mud hen, water hen, Fulica americana -n02018795 bustard -n02025239 ruddy turnstone, Arenaria interpres -n02027492 red-backed sandpiper, dunlin, Erolia alpina -n02028035 redshank, Tringa totanus -n02033041 dowitcher -n02037110 oystercatcher, oyster catcher -n02051845 pelican -n02056570 king penguin, Aptenodytes patagonica -n02058221 albatross, mollymawk -n02066245 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus -n02071294 killer whale, killer, orca, grampus, sea wolf, Orcinus orca -n02074367 dugong, Dugong dugon -n02077923 sea lion -n02085620 Chihuahua -n02085782 Japanese spaniel -n02085936 Maltese dog, Maltese terrier, Maltese -n02086079 Pekinese, Pekingese, Peke -n02086240 Shih-Tzu -n02086646 Blenheim spaniel -n02086910 papillon -n02087046 toy terrier -n02087394 Rhodesian ridgeback -n02088094 Afghan hound, Afghan -n02088238 basset, basset hound -n02088364 beagle -n02088466 bloodhound, sleuthhound -n02088632 bluetick -n02089078 black-and-tan coonhound -n02089867 Walker hound, Walker foxhound -n02089973 English foxhound -n02090379 redbone -n02090622 borzoi, Russian wolfhound -n02090721 Irish wolfhound -n02091032 Italian greyhound -n02091134 whippet -n02091244 Ibizan hound, Ibizan Podenco -n02091467 Norwegian elkhound, elkhound -n02091635 otterhound, otter hound -n02091831 Saluki, gazelle hound -n02092002 Scottish deerhound, deerhound -n02092339 Weimaraner -n02093256 Staffordshire bullterrier, Staffordshire bull terrier -n02093428 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier -n02093647 Bedlington terrier -n02093754 Border terrier -n02093859 Kerry blue terrier -n02093991 Irish terrier -n02094114 Norfolk terrier -n02094258 Norwich terrier -n02094433 Yorkshire terrier -n02095314 wire-haired fox terrier -n02095570 Lakeland terrier -n02095889 Sealyham terrier, Sealyham -n02096051 Airedale, Airedale terrier -n02096177 cairn, cairn terrier -n02096294 Australian terrier -n02096437 Dandie Dinmont, Dandie Dinmont terrier -n02096585 Boston bull, Boston terrier -n02097047 miniature schnauzer -n02097130 giant schnauzer -n02097209 standard schnauzer -n02097298 Scotch terrier, Scottish terrier, Scottie -n02097474 Tibetan terrier, chrysanthemum dog -n02097658 silky terrier, Sydney silky -n02098105 soft-coated wheaten terrier -n02098286 West Highland white terrier -n02098413 Lhasa, Lhasa apso -n02099267 flat-coated retriever -n02099429 curly-coated retriever -n02099601 golden retriever -n02099712 Labrador retriever -n02099849 Chesapeake Bay retriever -n02100236 German short-haired pointer -n02100583 vizsla, Hungarian pointer -n02100735 English setter -n02100877 Irish setter, red setter -n02101006 Gordon setter -n02101388 Brittany spaniel -n02101556 clumber, clumber spaniel -n02102040 English springer, English springer spaniel -n02102177 Welsh springer spaniel -n02102318 cocker spaniel, English cocker spaniel, cocker -n02102480 Sussex spaniel -n02102973 Irish water spaniel -n02104029 kuvasz -n02104365 schipperke -n02105056 groenendael -n02105162 malinois -n02105251 briard -n02105412 kelpie -n02105505 komondor -n02105641 Old English sheepdog, bobtail -n02105855 Shetland sheepdog, Shetland sheep dog, Shetland -n02106030 collie -n02106166 Border collie -n02106382 Bouvier des Flandres, Bouviers des Flandres -n02106550 Rottweiler -n02106662 German shepherd, German shepherd dog, German police dog, alsatian -n02107142 Doberman, Doberman pinscher -n02107312 miniature pinscher -n02107574 Greater Swiss Mountain dog -n02107683 Bernese mountain dog -n02107908 Appenzeller -n02108000 EntleBucher -n02108089 boxer -n02108422 bull mastiff -n02108551 Tibetan mastiff -n02108915 French bulldog -n02109047 Great Dane -n02109525 Saint Bernard, St Bernard -n02109961 Eskimo dog, husky -n02110063 malamute, malemute, Alaskan malamute -n02110185 Siberian husky -n02110341 dalmatian, coach dog, carriage dog -n02110627 affenpinscher, monkey pinscher, monkey dog -n02110806 basenji -n02110958 pug, pug-dog -n02111129 Leonberg -n02111277 Newfoundland, Newfoundland dog -n02111500 Great Pyrenees -n02111889 Samoyed, Samoyede -n02112018 Pomeranian -n02112137 chow, chow chow -n02112350 keeshond -n02112706 Brabancon griffon -n02113023 Pembroke, Pembroke Welsh corgi -n02113186 Cardigan, Cardigan Welsh corgi -n02113624 toy poodle -n02113712 miniature poodle -n02113799 standard poodle -n02113978 Mexican hairless -n02114367 timber wolf, grey wolf, gray wolf, Canis lupus -n02114548 white wolf, Arctic wolf, Canis lupus tundrarum -n02114712 red wolf, maned wolf, Canis rufus, Canis niger -n02114855 coyote, prairie wolf, brush wolf, Canis latrans -n02115641 dingo, warrigal, warragal, Canis dingo -n02115913 dhole, Cuon alpinus -n02116738 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus -n02117135 hyena, hyaena -n02119022 red fox, Vulpes vulpes -n02119789 kit fox, Vulpes macrotis -n02120079 Arctic fox, white fox, Alopex lagopus -n02120505 grey fox, gray fox, Urocyon cinereoargenteus -n02123045 tabby, tabby cat -n02123159 tiger cat -n02123394 Persian cat -n02123597 Siamese cat, Siamese -n02124075 Egyptian cat -n02125311 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor -n02127052 lynx, catamount -n02128385 leopard, Panthera pardus -n02128757 snow leopard, ounce, Panthera uncia -n02128925 jaguar, panther, Panthera onca, Felis onca -n02129165 lion, king of beasts, Panthera leo -n02129604 tiger, Panthera tigris -n02130308 cheetah, chetah, Acinonyx jubatus -n02132136 brown bear, bruin, Ursus arctos -n02133161 American black bear, black bear, Ursus americanus, Euarctos americanus -n02134084 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus -n02134418 sloth bear, Melursus ursinus, Ursus ursinus -n02137549 mongoose -n02138441 meerkat, mierkat -n02165105 tiger beetle -n02165456 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle -n02167151 ground beetle, carabid beetle -n02168699 long-horned beetle, longicorn, longicorn beetle -n02169497 leaf beetle, chrysomelid -n02172182 dung beetle -n02174001 rhinoceros beetle -n02177972 weevil -n02190166 fly -n02206856 bee -n02219486 ant, emmet, pismire -n02226429 grasshopper, hopper -n02229544 cricket -n02231487 walking stick, walkingstick, stick insect -n02233338 cockroach, roach -n02236044 mantis, mantid -n02256656 cicada, cicala -n02259212 leafhopper -n02264363 lacewing, lacewing fly -n02268443 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk -n02268853 damselfly -n02276258 admiral -n02277742 ringlet, ringlet butterfly -n02279972 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus -n02280649 cabbage butterfly -n02281406 sulphur butterfly, sulfur butterfly -n02281787 lycaenid, lycaenid butterfly -n02317335 starfish, sea star -n02319095 sea urchin -n02321529 sea cucumber, holothurian -n02325366 wood rabbit, cottontail, cottontail rabbit -n02326432 hare -n02328150 Angora, Angora rabbit -n02342885 hamster -n02346627 porcupine, hedgehog -n02356798 fox squirrel, eastern fox squirrel, Sciurus niger -n02361337 marmot -n02363005 beaver -n02364673 guinea pig, Cavia cobaya -n02389026 sorrel -n02391049 zebra -n02395406 hog, pig, grunter, squealer, Sus scrofa -n02396427 wild boar, boar, Sus scrofa -n02397096 warthog -n02398521 hippopotamus, hippo, river horse, Hippopotamus amphibius -n02403003 ox -n02408429 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis -n02410509 bison -n02412080 ram, tup -n02415577 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis -n02417914 ibex, Capra ibex -n02422106 hartebeest -n02422699 impala, Aepyceros melampus -n02423022 gazelle -n02437312 Arabian camel, dromedary, Camelus dromedarius -n02437616 llama -n02441942 weasel -n02442845 mink -n02443114 polecat, fitch, foulmart, foumart, Mustela putorius -n02443484 black-footed ferret, ferret, Mustela nigripes -n02444819 otter -n02445715 skunk, polecat, wood pussy -n02447366 badger -n02454379 armadillo -n02457408 three-toed sloth, ai, Bradypus tridactylus -n02480495 orangutan, orang, orangutang, Pongo pygmaeus -n02480855 gorilla, Gorilla gorilla -n02481823 chimpanzee, chimp, Pan troglodytes -n02483362 gibbon, Hylobates lar -n02483708 siamang, Hylobates syndactylus, Symphalangus syndactylus -n02484975 guenon, guenon monkey -n02486261 patas, hussar monkey, Erythrocebus patas -n02486410 baboon -n02487347 macaque -n02488291 langur -n02488702 colobus, colobus monkey -n02489166 proboscis monkey, Nasalis larvatus -n02490219 marmoset -n02492035 capuchin, ringtail, Cebus capucinus -n02492660 howler monkey, howler -n02493509 titi, titi monkey -n02493793 spider monkey, Ateles geoffroyi -n02494079 squirrel monkey, Saimiri sciureus -n02497673 Madagascar cat, ring-tailed lemur, Lemur catta -n02500267 indri, indris, Indri indri, Indri brevicaudatus -n02504013 Indian elephant, Elephas maximus -n02504458 African elephant, Loxodonta africana -n02509815 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens -n02510455 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca -n02514041 barracouta, snoek -n02526121 eel -n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch -n02606052 rock beauty, Holocanthus tricolor -n02607072 anemone fish -n02640242 sturgeon -n02641379 gar, garfish, garpike, billfish, Lepisosteus osseus -n02643566 lionfish -n02655020 puffer, pufferfish, blowfish, globefish -n02666196 abacus -n02667093 abaya -n02669723 academic gown, academic robe, judge's robe -n02672831 accordion, piano accordion, squeeze box -n02676566 acoustic guitar -n02687172 aircraft carrier, carrier, flattop, attack aircraft carrier -n02690373 airliner -n02692877 airship, dirigible -n02699494 altar -n02701002 ambulance -n02704792 amphibian, amphibious vehicle -n02708093 analog clock -n02727426 apiary, bee house -n02730930 apron -n02747177 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin -n02749479 assault rifle, assault gun -n02769748 backpack, back pack, knapsack, packsack, rucksack, haversack -n02776631 bakery, bakeshop, bakehouse -n02777292 balance beam, beam -n02782093 balloon -n02783161 ballpoint, ballpoint pen, ballpen, Biro -n02786058 Band Aid -n02787622 banjo -n02788148 bannister, banister, balustrade, balusters, handrail -n02790996 barbell -n02791124 barber chair -n02791270 barbershop -n02793495 barn -n02794156 barometer -n02795169 barrel, cask -n02797295 barrow, garden cart, lawn cart, wheelbarrow -n02799071 baseball -n02802426 basketball -n02804414 bassinet -n02804610 bassoon -n02807133 bathing cap, swimming cap -n02808304 bath towel -n02808440 bathtub, bathing tub, bath, tub -n02814533 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon -n02814860 beacon, lighthouse, beacon light, pharos -n02815834 beaker -n02817516 bearskin, busby, shako -n02823428 beer bottle -n02823750 beer glass -n02825657 bell cote, bell cot -n02834397 bib -n02835271 bicycle-built-for-two, tandem bicycle, tandem -n02837789 bikini, two-piece -n02840245 binder, ring-binder -n02841315 binoculars, field glasses, opera glasses -n02843684 birdhouse -n02859443 boathouse -n02860847 bobsled, bobsleigh, bob -n02865351 bolo tie, bolo, bola tie, bola -n02869837 bonnet, poke bonnet -n02870880 bookcase -n02871525 bookshop, bookstore, bookstall -n02877765 bottlecap -n02879718 bow -n02883205 bow tie, bow-tie, bowtie -n02892201 brass, memorial tablet, plaque -n02892767 brassiere, bra, bandeau -n02894605 breakwater, groin, groyne, mole, bulwark, seawall, jetty -n02895154 breastplate, aegis, egis -n02906734 broom -n02909870 bucket, pail -n02910353 buckle -n02916936 bulletproof vest -n02917067 bullet train, bullet -n02927161 butcher shop, meat market -n02930766 cab, hack, taxi, taxicab -n02939185 caldron, cauldron -n02948072 candle, taper, wax light -n02950826 cannon -n02951358 canoe -n02951585 can opener, tin opener -n02963159 cardigan -n02965783 car mirror -n02966193 carousel, carrousel, merry-go-round, roundabout, whirligig -n02966687 carpenter's kit, tool kit -n02971356 carton -n02974003 car wheel -n02977058 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM -n02978881 cassette -n02979186 cassette player -n02980441 castle -n02981792 catamaran -n02988304 CD player -n02992211 cello, violoncello -n02992529 cellular telephone, cellular phone, cellphone, cell, mobile phone -n02999410 chain -n03000134 chainlink fence -n03000247 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour -n03000684 chain saw, chainsaw -n03014705 chest -n03016953 chiffonier, commode -n03017168 chime, bell, gong -n03018349 china cabinet, china closet -n03026506 Christmas stocking -n03028079 church, church building -n03032252 cinema, movie theater, movie theatre, movie house, picture palace -n03041632 cleaver, meat cleaver, chopper -n03042490 cliff dwelling -n03045698 cloak -n03047690 clog, geta, patten, sabot -n03062245 cocktail shaker -n03063599 coffee mug -n03063689 coffeepot -n03065424 coil, spiral, volute, whorl, helix -n03075370 combination lock -n03085013 computer keyboard, keypad -n03089624 confectionery, confectionary, candy store -n03095699 container ship, containership, container vessel -n03100240 convertible -n03109150 corkscrew, bottle screw -n03110669 cornet, horn, trumpet, trump -n03124043 cowboy boot -n03124170 cowboy hat, ten-gallon hat -n03125729 cradle -n03126707 crane -n03127747 crash helmet -n03127925 crate -n03131574 crib, cot -n03133878 Crock Pot -n03134739 croquet ball -n03141823 crutch -n03146219 cuirass -n03160309 dam, dike, dyke -n03179701 desk -n03180011 desktop computer -n03187595 dial telephone, dial phone -n03188531 diaper, nappy, napkin -n03196217 digital clock -n03197337 digital watch -n03201208 dining table, board -n03207743 dishrag, dishcloth -n03207941 dishwasher, dish washer, dishwashing machine -n03208938 disk brake, disc brake -n03216828 dock, dockage, docking facility -n03218198 dogsled, dog sled, dog sleigh -n03220513 dome -n03223299 doormat, welcome mat -n03240683 drilling platform, offshore rig -n03249569 drum, membranophone, tympan -n03250847 drumstick -n03255030 dumbbell -n03259280 Dutch oven -n03271574 electric fan, blower -n03272010 electric guitar -n03272562 electric locomotive -n03290653 entertainment center -n03291819 envelope -n03297495 espresso maker -n03314780 face powder -n03325584 feather boa, boa -n03337140 file, file cabinet, filing cabinet -n03344393 fireboat -n03345487 fire engine, fire truck -n03347037 fire screen, fireguard -n03355925 flagpole, flagstaff -n03372029 flute, transverse flute -n03376595 folding chair -n03379051 football helmet -n03384352 forklift -n03388043 fountain -n03388183 fountain pen -n03388549 four-poster -n03393912 freight car -n03394916 French horn, horn -n03400231 frying pan, frypan, skillet -n03404251 fur coat -n03417042 garbage truck, dustcart -n03424325 gasmask, respirator, gas helmet -n03425413 gas pump, gasoline pump, petrol pump, island dispenser -n03443371 goblet -n03444034 go-kart -n03445777 golf ball -n03445924 golfcart, golf cart -n03447447 gondola -n03447721 gong, tam-tam -n03450230 gown -n03452741 grand piano, grand -n03457902 greenhouse, nursery, glasshouse -n03459775 grille, radiator grille -n03461385 grocery store, grocery, food market, market -n03467068 guillotine -n03476684 hair slide -n03476991 hair spray -n03478589 half track -n03481172 hammer -n03482405 hamper -n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier -n03485407 hand-held computer, hand-held microcomputer -n03485794 handkerchief, hankie, hanky, hankey -n03492542 hard disc, hard disk, fixed disk -n03494278 harmonica, mouth organ, harp, mouth harp -n03495258 harp -n03496892 harvester, reaper -n03498962 hatchet -n03527444 holster -n03529860 home theater, home theatre -n03530642 honeycomb -n03532672 hook, claw -n03534580 hoopskirt, crinoline -n03535780 horizontal bar, high bar -n03538406 horse cart, horse-cart -n03544143 hourglass -n03584254 iPod -n03584829 iron, smoothing iron -n03590841 jack-o'-lantern -n03594734 jean, blue jean, denim -n03594945 jeep, landrover -n03595614 jersey, T-shirt, tee shirt -n03598930 jigsaw puzzle -n03599486 jinrikisha, ricksha, rickshaw -n03602883 joystick -n03617480 kimono -n03623198 knee pad -n03627232 knot -n03630383 lab coat, laboratory coat -n03633091 ladle -n03637318 lampshade, lamp shade -n03642806 laptop, laptop computer -n03649909 lawn mower, mower -n03657121 lens cap, lens cover -n03658185 letter opener, paper knife, paperknife -n03661043 library -n03662601 lifeboat -n03666591 lighter, light, igniter, ignitor -n03670208 limousine, limo -n03673027 liner, ocean liner -n03676483 lipstick, lip rouge -n03680355 Loafer -n03690938 lotion -n03691459 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system -n03692522 loupe, jeweler's loupe -n03697007 lumbermill, sawmill -n03706229 magnetic compass -n03709823 mailbag, postbag -n03710193 mailbox, letter box -n03710637 maillot -n03710721 maillot, tank suit -n03717622 manhole cover -n03720891 maraca -n03721384 marimba, xylophone -n03724870 mask -n03729826 matchstick -n03733131 maypole -n03733281 maze, labyrinth -n03733805 measuring cup -n03742115 medicine chest, medicine cabinet -n03743016 megalith, megalithic structure -n03759954 microphone, mike -n03761084 microwave, microwave oven -n03763968 military uniform -n03764736 milk can -n03769881 minibus -n03770439 miniskirt, mini -n03770679 minivan -n03773504 missile -n03775071 mitten -n03775546 mixing bowl -n03776460 mobile home, manufactured home -n03777568 Model T -n03777754 modem -n03781244 monastery -n03782006 monitor -n03785016 moped -n03786901 mortar -n03787032 mortarboard -n03788195 mosque -n03788365 mosquito net -n03791053 motor scooter, scooter -n03792782 mountain bike, all-terrain bike, off-roader -n03792972 mountain tent -n03793489 mouse, computer mouse -n03794056 mousetrap -n03796401 moving van -n03803284 muzzle -n03804744 nail -n03814639 neck brace -n03814906 necklace -n03825788 nipple -n03832673 notebook, notebook computer -n03837869 obelisk -n03838899 oboe, hautboy, hautbois -n03840681 ocarina, sweet potato -n03841143 odometer, hodometer, mileometer, milometer -n03843555 oil filter -n03854065 organ, pipe organ -n03857828 oscilloscope, scope, cathode-ray oscilloscope, CRO -n03866082 overskirt -n03868242 oxcart -n03868863 oxygen mask -n03871628 packet -n03873416 paddle, boat paddle -n03874293 paddlewheel, paddle wheel -n03874599 padlock -n03876231 paintbrush -n03877472 pajama, pyjama, pj's, jammies -n03877845 palace -n03884397 panpipe, pandean pipe, syrinx -n03887697 paper towel -n03888257 parachute, chute -n03888605 parallel bars, bars -n03891251 park bench -n03891332 parking meter -n03895866 passenger car, coach, carriage -n03899768 patio, terrace -n03902125 pay-phone, pay-station -n03903868 pedestal, plinth, footstall -n03908618 pencil box, pencil case -n03908714 pencil sharpener -n03916031 perfume, essence -n03920288 Petri dish -n03924679 photocopier -n03929660 pick, plectrum, plectron -n03929855 pickelhaube -n03930313 picket fence, paling -n03930630 pickup, pickup truck -n03933933 pier -n03935335 piggy bank, penny bank -n03937543 pill bottle -n03938244 pillow -n03942813 ping-pong ball -n03944341 pinwheel -n03947888 pirate, pirate ship -n03950228 pitcher, ewer -n03954731 plane, carpenter's plane, woodworking plane -n03956157 planetarium -n03958227 plastic bag -n03961711 plate rack -n03967562 plow, plough -n03970156 plunger, plumber's helper -n03976467 Polaroid camera, Polaroid Land camera -n03976657 pole -n03977966 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria -n03980874 poncho -n03982430 pool table, billiard table, snooker table -n03983396 pop bottle, soda bottle -n03991062 pot, flowerpot -n03992509 potter's wheel -n03995372 power drill -n03998194 prayer rug, prayer mat -n04004767 printer -n04005630 prison, prison house -n04008634 projectile, missile -n04009552 projector -n04019541 puck, hockey puck -n04023962 punching bag, punch bag, punching ball, punchball -n04026417 purse -n04033901 quill, quill pen -n04033995 quilt, comforter, comfort, puff -n04037443 racer, race car, racing car -n04039381 racket, racquet -n04040759 radiator -n04041544 radio, wireless -n04044716 radio telescope, radio reflector -n04049303 rain barrel -n04065272 recreational vehicle, RV, R.V. -n04067472 reel -n04069434 reflex camera -n04070727 refrigerator, icebox -n04074963 remote control, remote -n04081281 restaurant, eating house, eating place, eatery -n04086273 revolver, six-gun, six-shooter -n04090263 rifle -n04099969 rocking chair, rocker -n04111531 rotisserie -n04116512 rubber eraser, rubber, pencil eraser -n04118538 rugby ball -n04118776 rule, ruler -n04120489 running shoe -n04125021 safe -n04127249 safety pin -n04131690 saltshaker, salt shaker -n04133789 sandal -n04136333 sarong -n04141076 sax, saxophone -n04141327 scabbard -n04141975 scale, weighing machine -n04146614 school bus -n04147183 schooner -n04149813 scoreboard -n04152593 screen, CRT screen -n04153751 screw -n04154565 screwdriver -n04162706 seat belt, seatbelt -n04179913 sewing machine -n04192698 shield, buckler -n04200800 shoe shop, shoe-shop, shoe store -n04201297 shoji -n04204238 shopping basket -n04204347 shopping cart -n04208210 shovel -n04209133 shower cap -n04209239 shower curtain -n04228054 ski -n04229816 ski mask -n04235860 sleeping bag -n04238763 slide rule, slipstick -n04239074 sliding door -n04243546 slot, one-armed bandit -n04251144 snorkel -n04252077 snowmobile -n04252225 snowplow, snowplough -n04254120 soap dispenser -n04254680 soccer ball -n04254777 sock -n04258138 solar dish, solar collector, solar furnace -n04259630 sombrero -n04263257 soup bowl -n04264628 space bar -n04265275 space heater -n04266014 space shuttle -n04270147 spatula -n04273569 speedboat -n04275548 spider web, spider's web -n04277352 spindle -n04285008 sports car, sport car -n04286575 spotlight, spot -n04296562 stage -n04310018 steam locomotive -n04311004 steel arch bridge -n04311174 steel drum -n04317175 stethoscope -n04325704 stole -n04326547 stone wall -n04328186 stopwatch, stop watch -n04330267 stove -n04332243 strainer -n04335435 streetcar, tram, tramcar, trolley, trolley car -n04336792 stretcher -n04344873 studio couch, day bed -n04346328 stupa, tope -n04347754 submarine, pigboat, sub, U-boat -n04350905 suit, suit of clothes -n04355338 sundial -n04355933 sunglass -n04356056 sunglasses, dark glasses, shades -n04357314 sunscreen, sunblock, sun blocker -n04366367 suspension bridge -n04367480 swab, swob, mop -n04370456 sweatshirt -n04371430 swimming trunks, bathing trunks -n04371774 swing -n04372370 switch, electric switch, electrical switch -n04376876 syringe -n04380533 table lamp -n04389033 tank, army tank, armored combat vehicle, armoured combat vehicle -n04392985 tape player -n04398044 teapot -n04399382 teddy, teddy bear -n04404412 television, television system -n04409515 tennis ball -n04417672 thatch, thatched roof -n04418357 theater curtain, theatre curtain -n04423845 thimble -n04428191 thresher, thrasher, threshing machine -n04429376 throne -n04435653 tile roof -n04442312 toaster -n04443257 tobacco shop, tobacconist shop, tobacconist -n04447861 toilet seat -n04456115 torch -n04458633 totem pole -n04461696 tow truck, tow car, wrecker -n04462240 toyshop -n04465501 tractor -n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi -n04476259 tray -n04479046 trench coat -n04482393 tricycle, trike, velocipede -n04483307 trimaran -n04485082 tripod -n04486054 triumphal arch -n04487081 trolleybus, trolley coach, trackless trolley -n04487394 trombone -n04493381 tub, vat -n04501370 turnstile -n04505470 typewriter keyboard -n04507155 umbrella -n04509417 unicycle, monocycle -n04515003 upright, upright piano -n04517823 vacuum, vacuum cleaner -n04522168 vase -n04523525 vault -n04525038 velvet -n04525305 vending machine -n04532106 vestment -n04532670 viaduct -n04536866 violin, fiddle -n04540053 volleyball -n04542943 waffle iron -n04548280 wall clock -n04548362 wallet, billfold, notecase, pocketbook -n04550184 wardrobe, closet, press -n04552348 warplane, military plane -n04553703 washbasin, handbasin, washbowl, lavabo, wash-hand basin -n04554684 washer, automatic washer, washing machine -n04557648 water bottle -n04560804 water jug -n04562935 water tower -n04579145 whiskey jug -n04579432 whistle -n04584207 wig -n04589890 window screen -n04590129 window shade -n04591157 Windsor tie -n04591713 wine bottle -n04592741 wing -n04596742 wok -n04597913 wooden spoon -n04599235 wool, woolen, woollen -n04604644 worm fence, snake fence, snake-rail fence, Virginia fence -n04606251 wreck -n04612504 yawl -n04613696 yurt -n06359193 web site, website, internet site, site -n06596364 comic book -n06785654 crossword puzzle, crossword -n06794110 street sign -n06874185 traffic light, traffic signal, stoplight -n07248320 book jacket, dust cover, dust jacket, dust wrapper -n07565083 menu -n07579787 plate -n07583066 guacamole -n07584110 consomme -n07590611 hot pot, hotpot -n07613480 trifle -n07614500 ice cream, icecream -n07615774 ice lolly, lolly, lollipop, popsicle -n07684084 French loaf -n07693725 bagel, beigel -n07695742 pretzel -n07697313 cheeseburger -n07697537 hotdog, hot dog, red hot -n07711569 mashed potato -n07714571 head cabbage -n07714990 broccoli -n07715103 cauliflower -n07716358 zucchini, courgette -n07716906 spaghetti squash -n07717410 acorn squash -n07717556 butternut squash -n07718472 cucumber, cuke -n07718747 artichoke, globe artichoke -n07720875 bell pepper -n07730033 cardoon -n07734744 mushroom -n07742313 Granny Smith -n07745940 strawberry -n07747607 orange -n07749582 lemon -n07753113 fig -n07753275 pineapple, ananas -n07753592 banana -n07754684 jackfruit, jak, jack -n07760859 custard apple -n07768694 pomegranate -n07802026 hay -n07831146 carbonara -n07836838 chocolate sauce, chocolate syrup -n07860988 dough -n07871810 meat loaf, meatloaf -n07873807 pizza, pizza pie -n07875152 potpie -n07880968 burrito -n07892512 red wine -n07920052 espresso -n07930864 cup -n07932039 eggnog -n09193705 alp -n09229709 bubble -n09246464 cliff, drop, drop-off -n09256479 coral reef -n09288635 geyser -n09332890 lakeside, lakeshore -n09399592 promontory, headland, head, foreland -n09421951 sandbar, sand bar -n09428293 seashore, coast, seacoast, sea-coast -n09468604 valley, vale -n09472597 volcano -n09835506 ballplayer, baseball player -n10148035 groom, bridegroom -n10565667 scuba diver -n11879895 rapeseed -n11939491 daisy -n12057211 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum -n12144580 corn -n12267677 acorn -n12620546 hip, rose hip, rosehip -n12768682 buckeye, horse chestnut, conker -n12985857 coral fungus -n12998815 agaric -n13037406 gyromitra -n13040303 stinkhorn, carrion fungus -n13044778 earthstar -n13052670 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa -n13054560 bolete -n13133613 ear, spike, capitulum -n15075141 toilet tissue, toilet paper, bathroom tissue diff --git a/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md b/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md index cdca581178f4b7..272432b402e81f 100644 --- a/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md +++ b/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md @@ -18,11 +18,11 @@ python3 style_transfer_sample.py --help The command yields the following usage message: ``` usage: style_transfer_sample.py [-h] -m MODEL -i INPUT [INPUT ...] - [-l CPU_EXTENSION] [-pp PLUGIN_DIR] - [-d DEVICE] [-nt NUMBER_TOP] [-ni NUMBER_ITER] + [-l CPU_EXTENSION] [-d DEVICE] + [-nt NUMBER_TOP] [-ni NUMBER_ITER] [--mean_val_r MEAN_VAL_R] [--mean_val_g MEAN_VAL_G] - [--mean_val_b MEAN_VAL_B] [-pc] + [--mean_val_b MEAN_VAL_B] Options: -h, --help Show this help message and exit. @@ -34,16 +34,12 @@ Options: Optional. Required for CPU custom layers. Absolute MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the kernels implementations - -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR - Path to a plugin folder -d DEVICE, --device DEVICE Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified. Default value is CPU -nt NUMBER_TOP, --number_top NUMBER_TOP Number of top results - -ni NUMBER_ITER, --number_iter NUMBER_ITER - Number of inference iterations --mean_val_r MEAN_VAL_R, -mean_val_r MEAN_VAL_R Mean value of red chanel for mean value subtraction in postprocessing @@ -53,13 +49,11 @@ Options: --mean_val_b MEAN_VAL_B, -mean_val_b MEAN_VAL_B Mean value of blue chanel for mean value subtraction in postprocessing - -pc, --perf_counts Report performance counters - ``` Running the application with the empty list of options yields the usage message given above and an error message. -To perform inference on an image using a trained model of NST network on Intel® CPUs, use the following command: +To perform inference of an image using a trained model of NST network on Intel® CPUs, use the following command: ``` python3 style_transfer_sample.py -i /cat.bmp -m /1_decoder_FP32.xml ``` diff --git a/inference-engine/ie_bridges/python/sample/style_transfer_sample/style_transfer_sample.py b/inference-engine/ie_bridges/python/sample/style_transfer_sample/style_transfer_sample.py index fc08b177912d77..725fb474a9dacb 100644 --- a/inference-engine/ie_bridges/python/sample/style_transfer_sample/style_transfer_sample.py +++ b/inference-engine/ie_bridges/python/sample/style_transfer_sample/style_transfer_sample.py @@ -22,7 +22,7 @@ import numpy as np import logging as log from time import time -from openvino.inference_engine import IENetwork, IEPlugin +from openvino.inference_engine import IENetwork, IECore def build_argparser(): @@ -36,13 +36,11 @@ def build_argparser(): help="Optional. Required for CPU custom layers. " "Absolute MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the " "kernels implementations", type=str, default=None) - args.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", type=str, default=None) args.add_argument("-d", "--device", help="Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample " "will look for a suitable plugin for device specified. Default value is CPU", default="CPU", type=str) args.add_argument("-nt", "--number_top", help="Number of top results", default=10, type=int) - args.add_argument("-ni", "--number_iter", help="Number of inference iterations", default=1, type=int) args.add_argument("--mean_val_r", "-mean_val_r", help="Mean value of red chanel for mean value subtraction in postprocessing ", default=0, type=float) @@ -52,8 +50,6 @@ def build_argparser(): args.add_argument("--mean_val_b", "-mean_val_b", help="Mean value of blue chanel for mean value subtraction in postprocessing ", default=0, type=float) - args.add_argument("-pc", "--perf_counts", help="Report performance counters", default=False, action="store_true") - return parser @@ -64,19 +60,20 @@ def main(): model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified - plugin = IEPlugin(device=args.device, plugin_dirs=args.plugin_dir) + log.info("Creating Inference Engine") + ie = IECore() if args.cpu_extension and 'CPU' in args.device: - plugin.add_cpu_extension(args.cpu_extension) + ie.add_extension(args.cpu_extension, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) - if plugin.device == "CPU": - supported_layers = plugin.get_supported_layers(net) + if "CPU" in args.device: + supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". - format(plugin.device, ', '.join(not_supported_layers))) + format(args.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) @@ -103,23 +100,12 @@ def main(): # Loading model to the plugin log.info("Loading model to the plugin") - exec_net = plugin.load(network=net) + exec_net = ie.load_network(network=net, device_name=args.device) # Start sync inference - log.info("Starting inference ({} iterations)".format(args.number_iter)) - infer_time = [] - for i in range(args.number_iter): - t0 = time() - res = exec_net.infer(inputs={input_blob: images}) - infer_time.append((time() - t0) * 1000) - log.info("Average running time of one iteration: {} ms".format(np.average(np.asarray(infer_time)))) - if args.perf_counts: - perf_counts = exec_net.requests[0].get_perf_counts() - log.info("Performance counters:") - print("{:<70} {:<15} {:<15} {:<15} {:<10}".format('name', 'layer_type', 'exet_type', 'status', 'real_time, us')) - for layer, stats in perf_counts.items(): - print("{:<70} {:<15} {:<15} {:<15} {:<10}".format(layer, stats['layer_type'], stats['exec_type'], - stats['status'], stats['real_time'])) + log.info("Starting inference") + res = exec_net.infer(inputs={input_blob: images}) + # Processing output blob log.info("Processing output blob") res = res[out_blob] @@ -135,6 +121,7 @@ def main(): out_img = os.path.join(os.path.dirname(__file__), "out_{}.bmp".format(batch)) cv2.imwrite(out_img, data) log.info("Result image was saved to {}".format(out_img)) + log.info("This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n") if __name__ == '__main__': diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt index 8e0a91aeb29486..1f46013b4be38f 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt @@ -13,7 +13,7 @@ file(GLOB SOURCE set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX TRUE ) -## Compatibility with python 2.7 which has depricated "register" specifier +## Compatibility with python 2.7 which has deprecated "register" specifier if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") add_definitions("-Wno-register") endif() diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/__init__.py b/inference-engine/ie_bridges/python/src/openvino/inference_engine/__init__.py index ff435b3bd9f52f..fe127f4f021502 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/__init__.py +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/__init__.py @@ -1,3 +1,4 @@ from .ie_api import * +__all__ = ['IENetwork', "IEPlugin", "IECore", "get_version"] __version__ = get_version() -__all__ = ['IENetwork', "IEPlugin", "IENetReader"] \ No newline at end of file + diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd index 8ee5656fb46c5b..fbcdda772dbb41 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd @@ -25,8 +25,9 @@ cdef class InferRequest: cpdef async_infer(self, inputs = ?) cpdef wait(self, timeout = ?) cpdef get_perf_counts(self) + cdef void user_callback(self, int status) with gil cdef public: - _inputs_list, _outputs_list + _inputs_list, _outputs_list, _py_callback, _py_data, _py_callback_used, _py_callback_called cdef class IENetwork: cdef C.IENetwork impl @@ -34,8 +35,9 @@ cdef class IENetwork: cdef class ExecutableNetwork: cdef unique_ptr[C.IEExecNetwork] impl cdef C.IEPlugin plugin_impl + cdef C.IECore ie_core_impl cdef public: - _requests, inputs, outputs + _requests, _infer_requests, inputs, outputs cdef class IEPlugin: cdef C.IEPlugin impl @@ -56,3 +58,7 @@ cdef class OutputInfo: cdef class LayersStatsMap(dict): cdef C.IENetwork net_impl + +cdef class IECore: + cdef C.IECore impl + cpdef ExecutableNetwork load_network(self, IENetwork network, str device_name, config = ?, int num_requests = ?) diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx index 834f72c5b1504d..d79a32a4080de5 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx @@ -7,12 +7,16 @@ from libcpp.vector cimport vector from libcpp.pair cimport pair from libcpp.map cimport map from libcpp.memory cimport unique_ptr -from libc.stdint cimport int64_t +from libc.stdlib cimport malloc, free +from libc.stdint cimport int64_t, uint8_t +from libc.string cimport memcpy, strcpy import os import numpy as np from copy import deepcopy import warnings +from collections import OrderedDict, namedtuple from collections import OrderedDict +import threading cdef extern from "" namespace "std" nogil: cdef unique_ptr[C.IEExecNetwork] move(unique_ptr[C.IEExecNetwork]) @@ -31,13 +35,103 @@ cdef dict_to_c_map(py_dict): c_map[k.encode()] = v.encode() return c_map -supported_precisions = ["FP32", "FP16", "Q78", "I32", "I16", "I8", "U32", "U16"] +cdef c_map_to_dict(map[string, string] c_map): + py_dict = {} + for v in c_map: + py_dict[v.first.decode()] = v.second.decode() + return py_dict + +supported_precisions = ["FP32", "FP16", "Q78", "I32", "I16", "I8", "U32", "U16", "U8"] + supported_layouts = ["NCHW", "NHWC", "OIHW", "C", "CHW", "HW", "NC", "CN", "BLOCKED", "NCDHW"] known_plugins = ['CPU', 'GPU', 'FPGA', 'MYRIAD', 'HETERO', 'HDDL'] +ctypedef enum StatusCode: + OK = 0 + GENERAL_ERROR = -1 + NOT_IMPLEMENTED = -2 + NETWORK_NOT_LOADED = -3 + PARAMETER_MISMATCH = -4 + NOT_FOUND = -5 + OUT_OF_BOUNDS = -6 + UNEXPECTED = -7 + REQUEST_BUSY = -8 + RESULT_NOT_READY = -9 + NOT_ALLOCATED = -10 + INFER_NOT_STARTED = -11 + NETWORK_NOT_READ = -12 + def get_version(): return C.get_version().decode() +cdef class IECore: + def __cinit__(self, xml_config_file: str = ""): + self.impl = C.IECore(xml_config_file.encode()) + + def get_versions(self, device_name: str): + cdef map[string, C.Version] versions_ + versions_ = self.impl.getVersions(device_name.encode()) + versions = {} + for v in versions_: + device = v.first.decode() + ver = v.second + versions[device] = namedtuple("Versions", ["major", "minor", "build_number", "description"]) + versions[device].build_number = ver.buildNumber.decode() + versions[device].description = ver.description.decode() + versions[device].minor = ver.apiVersion.minor + versions[device].major = ver.apiVersion.major + return versions + + cpdef ExecutableNetwork load_network(self, IENetwork network, str device_name, config=None, int num_requests=1): + cdef ExecutableNetwork exec_net = ExecutableNetwork() + cdef map[string, string] c_config + + if config: + c_config = dict_to_c_map(config) + exec_net.ie_core_impl = self.impl + exec_net.impl = move(self.impl.loadNetwork(network.impl, device_name.encode(), c_config, num_requests)) + exec_net.inputs = network.inputs.keys() + exec_net.outputs = list(network.outputs.keys()) + return exec_net + + def query_network(self, IENetwork network, str device_name, config=None): + cdef map[string, string] c_config + if config: + c_config = dict_to_c_map(config) + res = self.impl.queryNetwork(network.impl, device_name.encode(), c_config) + return c_map_to_dict(res) + + def set_config(self, config: dict, device_name: str): + cdef map[string, string] c_config = dict_to_c_map(config) + self.impl.setConfig(c_config, device_name.encode()) + + def register_plugin(self, plugin_name: str, device_name: str = ""): + self.impl.registerPlugin(plugin_name.encode(), device_name.encode()) + + def register_plugins(self, xml_config_file: str): + self.impl.registerPlugins(xml_config_file.encode()) + + def unregister_plugin(self, device_name: str): + self.impl.unregisterPlugin(device_name.encode()) + + def add_extension(self, extension_path: str, device_name: str): + self.impl.addExtension(extension_path.encode(), device_name.encode()) + + def get_metric(self, device_name: str, metric_name: str): + return self.impl.getMetric(device_name.encode(), metric_name.encode()) + + def get_config(self, device_name: str, config_name: str): + return self.impl.getConfig(device_name.encode(), config_name.encode()) + + + @property + def available_devices(self): + cdef vector[string] c_devices = self.impl.getAvailableDevices() + return [d.decode() for d in c_devices] + + # TODO: Add import network functionality + # TODO: Extend API for query config and attributes when it will be merged in C++ API + cdef class IENetLayer: @property def name(self): @@ -137,6 +231,7 @@ cdef class OutputInfo: cdef class ExecutableNetwork: def __init__(self): + self._infer_requests = [] self._requests = [] self.inputs = [] self.outputs = [] @@ -155,19 +250,53 @@ cdef class ExecutableNetwork: @property def requests(self): - requests = [] - for i in range(deref(self.impl).infer_requests.size()): - infer_request = InferRequest() - infer_request.impl = &(deref(self.impl).infer_requests[i]) - infer_request._inputs_list = self.inputs - infer_request._outputs_list = self.outputs - requests.append(infer_request) - return requests + if (len(self._infer_requests) == 0): + for i in range(deref(self.impl).infer_requests.size()): + infer_request = InferRequest() + infer_request.impl = &(deref(self.impl).infer_requests[i]) + self._infer_requests.append(infer_request) + + if (len(self._infer_requests) != deref(self.impl).infer_requests.size()): + raise Exception("Mismatch of infer requests number!") + + for i in range(len(self._infer_requests)): + self._infer_requests[i]._inputs_list = self.inputs + self._infer_requests[i]._outputs_list = self.outputs + + return self._infer_requests + + def get_exec_graph_info(self): + ie_network = IENetwork() + ie_network.impl = deref(self.impl).GetExecGraphInfo() + return ie_network + + def get_metric(self, metric_name: str): + return deref(self.impl).getMetric(metric_name.encode()) + + def get_config(self, config_name: str): + return deref(self.impl).getConfig(config_name.encode()) + +ctypedef extern void (*cb_type)(void*, int) with gil cdef class InferRequest: def __init__(self): self._inputs_list = [] self._outputs_list = [] + self._py_callback = lambda *args, **kwargs: None + self._py_callback_used = False + self._py_callback_called = threading.Event() + self._py_data = None + + cdef void user_callback(self, int status) with gil: + if self._py_callback: + self._py_callback(status, self._py_data) + self._py_callback_called.set() + + def set_completion_callback(self, py_callback, py_data = None): + self._py_callback = py_callback + self._py_data = py_data + self._py_callback_used = True + deref(self.impl).setCyCallback(self.user_callback, self) cpdef BlobBuffer _get_blob_buffer(self, const string & blob_name): cdef BlobBuffer buffer = BlobBuffer() @@ -185,13 +314,19 @@ cdef class InferRequest: cpdef async_infer(self, inputs=None): if inputs is not None: self._fill_inputs(inputs) - + self._py_callback_called.clear() deref(self.impl).infer_async() cpdef wait(self, timeout=None): - if timeout is None: - timeout = -1 - return deref(self.impl).wait( timeout) + if self._py_callback_used: + while not self._py_callback_called.is_set(): + if not self._py_callback_called.wait(timeout): + return StatusCode.REQUEST_BUSY + return StatusCode.OK + else: + if timeout is None: + timeout = -1 + return deref(self.impl).wait( timeout) cpdef get_perf_counts(self): cdef map[string, C.ProfileInfo] c_profile = deref(self.impl).getPerformanceCounts() @@ -258,19 +393,30 @@ cdef class LayersStatsMap(dict): self.net_impl.setStats(c_stats_map) cdef class IENetwork: - def __cinit__(self, model: str="", weights: str=""): + def __cinit__(self, model: [str, bytes] ="", weights: [str, bytes] ="", init_from_buffer: bool=False, + ngraph_compatibility: bool = False): + cdef char* xml_buffer = malloc(len(model)) + cdef uint8_t* bin_buffer = malloc(len(weights)) cdef string model_ cdef string weights_ - if model and weights: - if not os.path.isfile(model): - raise Exception("Path to the model {} doesn't exists or it's a directory".format(model)) - if not os.path.isfile(weights): - raise Exception("Path to the weights {} doesn't exists or it's a directory".format(weights)) - model_ = model.encode() - weights_ = weights.encode() - self.impl = C.IENetwork(model_, weights_) - else: + if init_from_buffer: + strcpy(xml_buffer, model) + memcpy(bin_buffer, weights, len(weights)) self.impl = C.IENetwork() + self.impl.load_from_buffer(xml_buffer, len(model), bin_buffer, len(weights)) + else: + if model and weights: + if not os.path.isfile(model): + raise Exception("Path to the model {} doesn't exists or it's a directory".format(model)) + if not os.path.isfile(weights): + raise Exception("Path to the weights {} doesn't exists or it's a directory".format(weights)) + model_ = model.encode() + weights_ = weights.encode() + self.impl = C.IENetwork(model_, weights_, ngraph_compatibility) + else: + self.impl = C.IENetwork() + free(xml_buffer) + @property def name(self): name = bytes(self.impl.name) @@ -302,6 +448,10 @@ cdef class IENetwork: def batch_size(self): return self.impl.batch_size + @property + def precision(self): + return self.impl.precision.decode() + @batch_size.setter def batch_size(self, batch: int): if batch <= 0: @@ -343,20 +493,26 @@ cdef class IENetwork: cdef IENetwork net = IENetwork(model, weights) return net - # TODO: Use enum with precision type instead of srting parameter when python2 support will not be required. + # TODO: Use enum with precision type instead of srting parameter when python2 support will not be required. def add_outputs(self, outputs, precision="FP32"): if precision.upper() not in supported_precisions: raise AttributeError( "Unsupported precision {}! List of supported precisions: {}".format(precision, supported_precisions)) if not isinstance(outputs, list): outputs = [outputs] - cdef vector[string] _outputs - for l in outputs: - _outputs.push_back(l.encode()) - self.impl.addOutputs(_outputs, precision.upper().encode()) - - def serialize(self, path_to_xml, path_to_bin): + for i, l in enumerate(outputs): + if isinstance(l, str): + self.impl.addOutput(l.encode(), 0, precision.upper().encode()) + elif isinstance(l, tuple) and len(l) == 2: + self.impl.addOutput(l[0].encode(), l[1], precision.upper().encode()) + else: + raise TypeError("Incorrect type {type} for layer to add at index {ind}. " + "Expected string with layer name or tuple with two elements: layer name as " + "first element and port id as second".format(type=type(l), ind=i)) + + def serialize(self, path_to_xml, path_to_bin: str = ""): self.impl.serialize(path_to_xml.encode(), path_to_bin.encode()) + def reshape(self, input_shapes: dict): cdef map[string, vector[size_t]] c_input_shapes; cdef vector[size_t] c_shape @@ -364,7 +520,7 @@ cdef class IENetwork: for input, shape in input_shapes.items(): c_shape = [] if input not in net_inputs: - raise AttributeError("Specified {} layer not in network inputs {}! ".format(input, net_inputs)) + raise AttributeError("Specified '{}' layer not in network inputs '{}'! ".format(input, net_inputs)) for v in shape: c_shape.push_back(v) c_input_shapes[input.encode()] = c_shape @@ -393,12 +549,11 @@ cdef class IEPlugin: self.impl = C.IEPlugin(device_, dirs_) cpdef ExecutableNetwork load(self, IENetwork network, int num_requests=1, config=None): - if num_requests <= 0: - raise ValueError( - "Incorrect number of requests specified: {}. Expected positive integer number.".format(num_requests)) cdef ExecutableNetwork exec_net = ExecutableNetwork() cdef map[string, string] c_config - + if num_requests < 0: + raise ValueError("Incorrect number of requests specified: {}. Expected positive integer number " + "or zero for auto detection".format(num_requests)) if config: for k, v in config.items(): c_config[to_std_string(k)] = to_std_string(v) @@ -438,6 +593,7 @@ cdef class IEPlugin: c_config[to_std_string(k)] = to_std_string(v) self.impl.setConfig(c_config) + # TODO: Add export compiled network functionality cdef class BlobBuffer: """Copy-less accessor for Inference Engine Blob""" diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp index 1bb3e909ed5b8e..371ffcfdbba121 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp @@ -1,16 +1,6 @@ // Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. #include "ie_api_impl.hpp" #include "hetero/hetero_plugin_config.hpp" @@ -45,14 +35,151 @@ std::map layout_map = {{"ANY", Inferen } \ } \ +uint32_t getOptimalNumberOfRequests(const InferenceEngine::IExecutableNetwork::Ptr actual) { + try { + InferenceEngine::ResponseDesc response; + InferenceEngine::Parameter parameter_value; + IE_CHECK_CALL(actual->GetMetric(METRIC_KEY(SUPPORTED_METRICS), parameter_value, &response)); + auto supported_metrics = parameter_value.as>(); + std::string key = METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS); + if (std::find(supported_metrics.begin(), supported_metrics.end(), key) != supported_metrics.end()) { + IE_CHECK_CALL(actual->GetMetric(key, parameter_value, &response)); + if (parameter_value.is()) + return parameter_value.as(); + else + THROW_IE_EXCEPTION << "Unsupported format for " << key << "!" + << " Please specify number of infer requests directly!"; + } else { + THROW_IE_EXCEPTION << "Can't load network: " << key << " is not supported!" + << " Please specify number of infer requests directly!"; + } + } catch (const std::exception& ex) { + THROW_IE_EXCEPTION << "Can't load network: " << ex.what() + << " Please specify number of infer requests directly!"; + } +} -InferenceEnginePython::IENetwork::IENetwork(const std::string &model, const std::string &weights) { +PyObject* parse_parameter(const InferenceEngine::Parameter & param){ + // Check for std::string + if (param.is()){ + return PyUnicode_FromString(param.as().c_str()); + } + // Check for int + else if (param.is()) { + auto val = param.as(); + return PyLong_FromLong((long)val); + } + // Check for float + else if (param.is()) { + auto val = param.as(); + return PyFloat_FromDouble((double)val); + } + // Check for bool + else if (param.is()) { + auto val = param.as(); + return val ? Py_True : Py_False; + } + // Check for std::vector + else if (param.is>()) { + auto val = param.as>(); + PyObject *list = PyList_New(0); + for (const auto & it : val){ + PyObject *str_val = PyUnicode_FromString(it.c_str()); + PyList_Append(list, str_val); + } + return list; + } + // Check for std::vector + else if (param.is>()){ + auto val = param.as>(); + PyObject *list = PyList_New(0); + for (const auto & it : val){ + PyList_Append(list, PyLong_FromLong(it)); + } + return list; + } + // Check for std::vector + else if (param.is>()){ + auto val = param.as>(); + PyObject *list = PyList_New(0); + for (const auto & it : val){ + PyList_Append(list, PyFloat_FromDouble((double)it)); + } + return list; + } + // Check for std::tuple + else if (param.is>()) { + auto val = param.as>(); + PyObject *tuple = PyTuple_New(2); + PyTuple_SetItem(tuple, 0, PyLong_FromUnsignedLong((unsigned long)std::get<0>(val))); + PyTuple_SetItem(tuple, 1, PyLong_FromUnsignedLong((unsigned long)std::get<1>(val))); + return tuple; + } + // Check for std::tuple + else if (param.is>()) { + auto val = param.as>(); + PyObject *tuple = PyTuple_New(3); + PyTuple_SetItem(tuple, 0, PyLong_FromUnsignedLong((unsigned long)std::get<0>(val))); + PyTuple_SetItem(tuple, 1, PyLong_FromUnsignedLong((unsigned long)std::get<1>(val))); + PyTuple_SetItem(tuple, 2, PyLong_FromUnsignedLong((unsigned long)std::get<2>(val))); + return tuple; + } + // Check for std::map + else if (param.is>()) { + auto val = param.as>(); + PyObject *dict = PyDict_New(); + for (const auto &it : val){ + PyDict_SetItemString(dict, it.first.c_str(), PyUnicode_FromString(it.second.c_str())); + } + return dict; + } + // Check for std::map + else if (param.is>()) { + auto val = param.as>(); + PyObject *dict = PyDict_New(); + for (const auto &it : val){ + PyDict_SetItemString(dict, it.first.c_str(), PyLong_FromLong((long)it.second)); + } + return dict; + } + else { + PyErr_SetString(PyExc_TypeError, "Failed to convert parameter to Python representation!"); + return (PyObject *) NULL; + } +} +InferenceEnginePython::IENetwork::IENetwork(const std::string &model, const std::string &weights, bool ngraph_compatibility = false) { + if (ngraph_compatibility){ + InferenceEngine::IRReader ir_reader; + auto ngraph_function = ir_reader.read(model, weights); + actual = InferenceEngine::CNNNetwork(InferenceEngine::convertFunctionToICNNNetwork(ngraph_function)); + } else { + InferenceEngine::CNNNetReader net_reader; + net_reader.ReadNetwork(model); + net_reader.ReadWeights(weights); + actual = net_reader.getNetwork(); + } + name = actual.getName(); + batch_size = actual.getBatchSize(); + precision = actual.getPrecision().name(); +} + +InferenceEnginePython::IENetwork::IENetwork(const InferenceEngine::CNNNetwork& cnn_network) + : actual(cnn_network) { + name = actual.getName(); + batch_size = actual.getBatchSize(); + precision = actual.getPrecision().name(); +} + +void InferenceEnginePython::IENetwork::load_from_buffer(const char *xml, size_t xml_size, uint8_t *bin, size_t bin_size) { InferenceEngine::CNNNetReader net_reader; - net_reader.ReadNetwork(model); - net_reader.ReadWeights(weights); + net_reader.ReadNetwork(xml, xml_size); + InferenceEngine::TensorDesc tensorDesc(InferenceEngine::Precision::U8, {bin_size}, InferenceEngine::Layout::C); + auto weights_blob = InferenceEngine::make_shared_blob(tensorDesc, bin, bin_size); + net_reader.SetWeights(weights_blob); name = net_reader.getName(); actual = net_reader.getNetwork(); batch_size = actual.getBatchSize(); + precision = actual.getPrecision().name(); } void InferenceEnginePython::IENetwork::serialize(const std::string &path_to_xml, const std::string &path_to_bin) { @@ -87,7 +214,7 @@ InferenceEnginePython::IENetwork::getLayers() { for (auto layer_iter : inputTo) { InferenceEngine::CNNLayerPtr layer_in_data = layer_iter.second; if (!layer_in_data) { - THROW_IE_EXCEPTION << "Layer which takes data " << data->name << " is nullptr"; + THROW_IE_EXCEPTION << "Layer which takes data " << data->getName() << " is nullptr"; } children.emplace_back(layer_in_data->name); } @@ -150,22 +277,15 @@ const std::map InferenceEnginePy } void -InferenceEnginePython::IENetwork::addOutputs(const std::vector &out_layers, const std::string &precision) { - for (auto &&l : out_layers) { - InferenceEngine::OutputsDataMap outputsDataMap = actual.getOutputsInfo(); - if (outputsDataMap.find(l) != outputsDataMap.end()) { - continue; - } - InferenceEngine::CNNLayerPtr cnnLayer = actual.getLayerByName(l.c_str()); - std::vector outData = cnnLayer->outData; - if (outData.size() != 1) { - std::cout << "Layer " << l << " has " << outData.size() << " output blobs and can not be set as output." - << std::endl; - continue; - } - actual.addOutput(l); - InferenceEngine::OutputsDataMap outputsDataMapUpd = actual.getOutputsInfo(); - outputsDataMapUpd[l]->setPrecision(precision_map[precision]); +InferenceEnginePython::IENetwork::addOutput(const std::string &out_layer, size_t port_id, const std::string &precision) { + actual.addOutput(out_layer, port_id); + InferenceEngine::OutputsDataMap outputsDataMapUpd = actual.getOutputsInfo(); + if (outputsDataMapUpd.count(out_layer)) { + outputsDataMapUpd[out_layer]->setPrecision(precision_map[precision]); + } else if (outputsDataMapUpd.count(out_layer + "." + std::to_string(port_id))){ + outputsDataMapUpd[out_layer + "." + std::to_string(port_id)]->setPrecision(precision_map[precision]); + } else { + THROW_IE_EXCEPTION << "Failed to set precision for layer " << out_layer; } } @@ -192,9 +312,8 @@ const std::map>> Inference return map; } -void -InferenceEnginePython::IENetwork::setStats( - const std::map>> &stats) { +void InferenceEnginePython::IENetwork::setStats(const std::map>> &stats) { InferenceEngine::ICNNNetworkStats *pstats = nullptr; InferenceEngine::ResponseDesc response; IE_CHECK_CALL(((InferenceEngine::ICNNNetwork &) actual).getStats(&pstats, &response)); @@ -222,10 +341,11 @@ void InferenceEnginePython::OutputInfo::setPrecision(std::string precision) { } InferenceEnginePython::IEPlugin::IEPlugin(const std::string &device, const std::vector &plugin_dirs) { + IE_SUPPRESS_DEPRECATED_START InferenceEngine::PluginDispatcher dispatcher{plugin_dirs}; actual = dispatcher.getPluginByDevice(device); - const InferenceEngine::Version *pluginVersion; - actual->GetVersion(pluginVersion); + IE_SUPPRESS_DEPRECATED_END + auto pluginVersion = actual.GetVersion(); version = std::to_string(pluginVersion->apiVersion.major) + "."; version += std::to_string(pluginVersion->apiVersion.minor) + "."; version += pluginVersion->buildNumber; @@ -233,17 +353,32 @@ InferenceEnginePython::IEPlugin::IEPlugin(const std::string &device, const std:: } void InferenceEnginePython::IEPlugin::setInitialAffinity(const InferenceEnginePython::IENetwork &net) { - InferenceEngine::HeteroPluginPtr hetero_plugin(actual); - InferenceEngine::ResponseDesc response; + InferenceEngine::InferenceEnginePluginPtr hetero_plugin(actual); + InferenceEngine::QueryNetworkResult queryRes; auto &network = net.actual; - IE_CHECK_CALL(hetero_plugin->SetAffinity(network, {}, &response)); + + hetero_plugin->QueryNetwork(network, {}, queryRes); + + if (queryRes.rc != InferenceEngine::StatusCode::OK) { + THROW_IE_EXCEPTION << queryRes.resp.msg; + } + + for (auto && layer : queryRes.supportedLayersMap) { + network.getLayerByName(layer.first.c_str())->affinity = layer.second; + } } std::set InferenceEnginePython::IEPlugin::queryNetwork(const InferenceEnginePython::IENetwork &net) { const InferenceEngine::CNNNetwork &network = net.actual; InferenceEngine::QueryNetworkResult queryRes; - actual->QueryNetwork(network, queryRes); - return queryRes.supportedLayers; + actual.QueryNetwork(network, {}, queryRes); + + std::set supportedLayers; + for (auto && layer : queryRes.supportedLayersMap) { + supportedLayers.insert(layer.first); + } + + return supportedLayers; } @@ -289,10 +424,9 @@ void InferenceEnginePython::IENetLayer::setPrecision(std::string precision) { } void InferenceEnginePython::IEPlugin::addCpuExtension(const std::string &extension_path) { - InferenceEngine::ResponseDesc response; auto extension_ptr = InferenceEngine::make_so_pointer(extension_path); auto extension = std::dynamic_pointer_cast(extension_ptr); - IE_CHECK_CALL(actual->AddExtension(extension, &response)) + actual.AddExtension(extension); } std::unique_ptr @@ -302,7 +436,12 @@ InferenceEnginePython::IEPlugin::load(const InferenceEnginePython::IENetwork &ne InferenceEngine::ResponseDesc response; auto exec_network = InferenceEnginePython::make_unique(net.name, num_requests); - IE_CHECK_CALL(actual->LoadNetwork(exec_network->actual, net.actual, config, &response)) + exec_network->actual = actual.LoadNetwork(net.actual, config); + + if (0 == num_requests) { + num_requests = getOptimalNumberOfRequests(exec_network->actual); + exec_network->infer_requests.resize(num_requests); + } for (size_t i = 0; i < num_requests; ++i) { InferRequestWrap &infer_request = exec_network->infer_requests[i]; @@ -313,8 +452,7 @@ InferenceEnginePython::IEPlugin::load(const InferenceEnginePython::IENetwork &ne } void InferenceEnginePython::IEPlugin::setConfig(const std::map &config) { - InferenceEngine::ResponseDesc response; - IE_CHECK_CALL(actual->SetConfig(config, &response)) + actual.SetConfig(config); } InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string &name, size_t num_requests) : @@ -326,9 +464,29 @@ void InferenceEnginePython::IEExecNetwork::infer() { request.infer(); } +InferenceEnginePython::IENetwork InferenceEnginePython::IEExecNetwork::GetExecGraphInfo() { + InferenceEngine::ResponseDesc response; + InferenceEngine::ICNNNetwork::Ptr graph; + IE_CHECK_CALL(actual->GetExecGraphInfo(graph, &response)); + return IENetwork(InferenceEngine::CNNNetwork(graph)); +} -void InferenceEnginePython::InferRequestWrap::getBlobPtr(const std::string &blob_name, InferenceEngine::Blob::Ptr &blob_ptr) -{ +PyObject* InferenceEnginePython::IEExecNetwork::getMetric(const std::string &metric_name) { + InferenceEngine::Parameter parameter; + InferenceEngine::ResponseDesc response; + IE_CHECK_CALL(actual->GetMetric(metric_name, parameter, &response)); + return parse_parameter(parameter); +} + +PyObject* InferenceEnginePython::IEExecNetwork::getConfig(const std::string &metric_name) { + InferenceEngine::Parameter parameter; + InferenceEngine::ResponseDesc response; + IE_CHECK_CALL(actual->GetMetric(metric_name, parameter, &response)); + return parse_parameter(parameter); +} + +void InferenceEnginePython::InferRequestWrap::getBlobPtr(const std::string &blob_name, + InferenceEngine::Blob::Ptr &blob_ptr) { InferenceEngine::ResponseDesc response; IE_CHECK_CALL(request_ptr->GetBlob(blob_name.c_str(), blob_ptr, &response)); } @@ -339,16 +497,24 @@ void InferenceEnginePython::InferRequestWrap::setBatch(int size) { IE_CHECK_CALL(request_ptr->SetBatch(size, &response)); } -void latency_callback(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code){ +void latency_callback(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) { if (code != InferenceEngine::StatusCode::OK) { THROW_IE_EXCEPTION << "Async Infer Request failed with status code " << code; } InferenceEnginePython::InferRequestWrap *requestWrap; InferenceEngine::ResponseDesc dsc; - request->GetUserData(reinterpret_cast(&requestWrap), &dsc); + request->GetUserData(reinterpret_cast(&requestWrap), &dsc); auto end_time = Time::now(); auto execTime = std::chrono::duration_cast(end_time - requestWrap->start_time); requestWrap->exec_time = static_cast(execTime.count()) * 0.000001; + if (requestWrap->user_callback) { + requestWrap->user_callback(requestWrap->user_data, code); + } +} + +void InferenceEnginePython::InferRequestWrap::setCyCallback(cy_callback callback, void *data) { + user_callback = callback; + user_data = data; } void InferenceEnginePython::InferRequestWrap::infer() { @@ -413,3 +579,77 @@ std::string InferenceEnginePython::get_version() { version_str += version->buildNumber; return version_str; } + + +InferenceEnginePython::IECore::IECore(const std::string & xmlConfigFile) { + actual = InferenceEngine::Core(xmlConfigFile); +} + +std::map InferenceEnginePython::IECore::getVersions(const std::string &deviceName) { + return actual.GetVersions(deviceName); +} + +std::unique_ptr InferenceEnginePython::IECore::loadNetwork(IENetwork network, + const std::string & deviceName, const std::map & config, int num_requests){ + + InferenceEngine::ResponseDesc response; + auto exec_network = InferenceEnginePython::make_unique(network.name, + num_requests); + exec_network->actual = actual.LoadNetwork(network.actual, deviceName, config); + + if (0 == num_requests) { + num_requests = getOptimalNumberOfRequests(exec_network->actual); + exec_network->infer_requests.resize(num_requests); + } + + for (size_t i = 0; i < num_requests; ++i) { + InferRequestWrap &infer_request = exec_network->infer_requests[i]; + IE_CHECK_CALL(exec_network->actual->CreateInferRequest(infer_request.request_ptr, &response)) + } + + return exec_network; +} + +std::map InferenceEnginePython::IECore::queryNetwork(InferenceEnginePython::IENetwork network, + const std::string &deviceName, + const std::map &config) { + auto res = actual.QueryNetwork(network.actual, deviceName, config); + return res.supportedLayersMap; +} + +void InferenceEnginePython::IECore::setConfig(const std::map &config, + const std::string &deviceName) { + actual.SetConfig(config, deviceName); +} + +void InferenceEnginePython::IECore::registerPlugin(const std::string & pluginName, const std::string &deviceName) { + actual.RegisterPlugin(pluginName, deviceName); +} + +void InferenceEnginePython::IECore::unregisterPlugin(const std::string & deviceName){ + actual.UnregisterPlugin(deviceName); +} + +void InferenceEnginePython::IECore::registerPlugins(const std::string & xmlConfigFile){ + actual.RegisterPlugins(xmlConfigFile); +} + +void InferenceEnginePython::IECore::addExtension(const std::string & ext_lib_path, const std::string &deviceName) { + auto extension_ptr = InferenceEngine::make_so_pointer(ext_lib_path); + auto extension = std::dynamic_pointer_cast(extension_ptr); + actual.AddExtension(extension, deviceName); +} + +std::vector InferenceEnginePython::IECore::getAvailableDevices() { + return actual.GetAvailableDevices(); +} + +PyObject* InferenceEnginePython::IECore::getMetric(const std::string &deviceName, const std::string &name) { + InferenceEngine::Parameter param = actual.GetMetric(deviceName, name); + return parse_parameter(param); +} + +PyObject* InferenceEnginePython::IECore::getConfig(const std::string &deviceName, const std::string &name) { + InferenceEngine::Parameter param = actual.GetConfig(deviceName, name); + return parse_parameter(param); +} diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp index 9297de68967380..59e632080776df 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp @@ -1,35 +1,26 @@ // Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. #pragma once -#include -#include +#include "Python.h" +#include #include #include #include #include #include - - #include #include - #include #include + +#include #include "inference_engine.hpp" +#include "../../../../../src/inference_engine/ie_ir_reader.hpp" + typedef std::chrono::high_resolution_clock Time; typedef std::chrono::nanoseconds ns; @@ -90,10 +81,11 @@ struct IENetwork { InferenceEngine::CNNNetwork actual; std::string name; std::size_t batch_size; + std::string precision; void setBatch(const size_t size); - void addOutputs(const std::vector &out_layers, const std::string &precision); + void addOutput(const std::string &out_layer, size_t port_id, const std::string &precision); const std::vector> getLayers(); @@ -109,21 +101,33 @@ struct IENetwork { const std::map>> getStats(); - IENetwork(const std::string &model, const std::string &weights); + void load_from_buffer(const char* xml, size_t xml_size, uint8_t* bin, size_t bin_size); + + IENetwork(const std::string &model, const std::string &weights, bool ngraph_compatibility); + + IENetwork(const InferenceEngine::CNNNetwork& cnn_network); IENetwork() = default; }; struct InferRequestWrap { + using cy_callback = void (*)(void*, int); + InferenceEngine::IInferRequest::Ptr request_ptr; Time::time_point start_time; double exec_time; + cy_callback user_callback; + void *user_data; + int status; + void infer(); void infer_async(); int wait(int64_t timeout); + void setCyCallback(cy_callback callback, void *data); + void getBlobPtr(const std::string &blob_name, InferenceEngine::Blob::Ptr &blob_ptr); void setBatch(int size); @@ -139,7 +143,12 @@ struct IEExecNetwork { IEExecNetwork(const std::string &name, size_t num_requests); + IENetwork GetExecGraphInfo(); + void infer(); + + PyObject* getMetric(const std::string & metric_name); + PyObject* getConfig(const std::string & metric_name); }; @@ -163,7 +172,25 @@ struct IEPlugin { std::set queryNetwork(const InferenceEnginePython::IENetwork &net); - InferenceEngine::InferenceEnginePluginPtr actual; + InferenceEngine::InferencePlugin actual; +}; + +struct IECore { + InferenceEngine::Core actual; + explicit IECore(const std::string & xmlConfigFile = std::string()); + std::map getVersions(const std::string & deviceName); + std::unique_ptr loadNetwork(IENetwork network, const std::string & deviceName, + const std::map & config, int num_requests); + std::map queryNetwork(IENetwork network, const std::string & deviceName, + const std::map & config); + void setConfig(const std::map &config, const std::string & deviceName = std::string()); + void registerPlugin(const std::string & pluginName, const std::string & deviceName); + void unregisterPlugin(const std::string & deviceName); + void registerPlugins(const std::string & xmlConfigFile); + void addExtension(const std::string & ext_lib_path, const std::string & deviceName); + std::vector getAvailableDevices(); + PyObject* getMetric(const std::string & deviceName, const std::string & name); + PyObject* getConfig(const std::string & deviceName, const std::string & name); }; template diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd index f5729b684e2bea..95db6d98cc946f 100644 --- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd +++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd @@ -1,12 +1,12 @@ from libc.stddef cimport size_t +from libcpp cimport bool from libcpp.string cimport string from libcpp.vector cimport vector from libcpp.map cimport map from libcpp.set cimport set from libcpp.pair cimport pair from libcpp.memory cimport unique_ptr, shared_ptr -from libc.stdint cimport int64_t - +from libc.stdint cimport int64_t, uint8_t cdef extern from "" namespace "InferenceEngine": @@ -24,6 +24,14 @@ cdef extern from "" namespace "InferenceEngine": cdef cppclass Precision: const char*name() const + cdef struct apiVersion: + int minor + int major + + cdef cppclass Version: + const char *buildNumber + const char *description + apiVersion apiVersion cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": cdef cppclass IENetLayer: @@ -69,17 +77,21 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": cdef cppclass IEExecNetwork: vector[InferRequestWrap] infer_requests + IENetwork GetExecGraphInfo() except + + object getMetric(const string & metric_name) + object getConfig(const string & metric_name) cdef cppclass IENetwork: IENetwork() except + - IENetwork(const string &, const string &) except + + IENetwork(const string &, const string &, bool ngraph_compatibility) except + string name size_t batch_size + string precision map[string, vector[size_t]] inputs const vector[pair[string, IENetLayer]] getLayers() except + map[string, InputInfo] getInputs() except + map[string, OutputInfo] getOutputs() except + - void addOutputs(vector[string] &, string &) except + + void addOutput(string &, size_t, string &) except + void setAffinity(map[string, string] & types_affinity_map, map[string, string] & layers_affinity_map) except + void setBatch(size_t size) except + void setLayerParams(map[string, map[string, string]] params_map) except + @@ -87,6 +99,7 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": void reshape(map[string, vector[size_t]] input_shapes) except + void setStats(map[string, map[string, vector[float]]] & stats) except + map[string, map[string, vector[float]]] getStats() except + + void load_from_buffer(const char*xml, size_t xml_size, uint8_t*bin, size_t bin_size) except + cdef cppclass IEPlugin: IEPlugin() except + @@ -101,12 +114,30 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython": cdef cppclass InferRequestWrap: double exec_time; - void getBlobPtr(const string &blob_name, Blob.Ptr &blob_ptr) except + + void getBlobPtr(const string & blob_name, Blob.Ptr & blob_ptr) except + map[string, ProfileInfo] getPerformanceCounts() except + void infer() except + void infer_async() except + int wait(int64_t timeout) except + void setBatch(int size) except + + void setCyCallback(void (*)(void*, int), void *) except + + + cdef cppclass IECore: + IECore() except + + IECore(const string & xml_config_file) except + + map[string, Version] getVersions(const string & deviceName) except + + unique_ptr[IEExecNetwork] loadNetwork(IENetwork network, const string deviceName, + const map[string, string] & config, int num_requests) except + + map[string, string] queryNetwork(IENetwork network, const string deviceName, + const map[string, string] & config) except + + void setConfig(const map[string, string] & config, const string & deviceName) except + + void registerPlugin(const string & pluginName, const string & deviceName) except + + void unregisterPlugin(const string & deviceName) except + + void registerPlugins(const string & xmlConfigFile) except + + void addExtension(const string & ext_lib_path, const string & deviceName) except + + vector[string] getAvailableDevices() except + + object getMetric(const string & deviceName, const string & name) except + + object getConfig(const string & deviceName, const string & name) except + cdef T*get_buffer[T](Blob &) diff --git a/inference-engine/ie_bridges/python/src/openvino/tools/__init__.py b/inference-engine/ie_bridges/python/src/openvino/tools/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/CMakeLists.txt new file mode 100644 index 00000000000000..a649bd2655fc1d --- /dev/null +++ b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/CMakeLists.txt @@ -0,0 +1,33 @@ +# If the pyx file is a C++ file, we should specify that here. +set (CMAKE_INCLUDE_CURRENT_DIR ON) +set (TARGET_NAME "statistics_collector_api") + +set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/tools/statistics_collector) +set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) + +file(GLOB SOURCE + ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx + ) + +set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX TRUE +) +include_directories ( + ${CMAKE_SOURCE_DIR}/samples/common +) + +## Compatibility with python 2.7 which has deprecated "register" specifier +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + add_definitions("-Wno-register") +endif() + +cython_add_module (${TARGET_NAME} ${SOURCE}) + +set_target_properties (${TARGET_NAME} PROPERTIES CXX_STANDARD 11 LINKER_LANGUAGE CXX) +target_link_libraries (${TARGET_NAME} PRIVATE statistics_collector_s) + +# perform copy +ADD_CUSTOM_COMMAND (TARGET ${TARGET_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/tools/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/../__init__.py + COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/tools/statistics_collector/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/__init__.py + ) diff --git a/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/__init__.py b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/__init__.py new file mode 100644 index 00000000000000..0cd0454367739c --- /dev/null +++ b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/__init__.py @@ -0,0 +1,2 @@ +from .statistics_collector_api import * +__all__ = ['StatisticsCollector'] \ No newline at end of file diff --git a/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_api.pxd b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_api.pxd new file mode 100644 index 00000000000000..94cd5c0a3561a9 --- /dev/null +++ b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_api.pxd @@ -0,0 +1,8 @@ +from .cimport statistics_collector_c as C +from libcpp.string cimport string + + +cdef class StatisticsCollector: + cdef C.StatisticsCollector* _impl + cdef C.ct_preprocessingOptions ppOptions + cpdef void collectStatisticsToIR(self, str outModelName, str output_precision) \ No newline at end of file diff --git a/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_api.pyx b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_api.pyx new file mode 100644 index 00000000000000..7ebd6d5503a7b3 --- /dev/null +++ b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_api.pyx @@ -0,0 +1,25 @@ +#distutils: language=c++ +from .cimport statistics_collector_c as C + + +cdef class StatisticsCollector: + def __cinit__(self, + deviceName: [str, bytes], + custom_cpu_library: [str, bytes], + custom_cldnn: [str, bytes], + modelFilePath: [str, bytes], + imagesPath: [str, bytes], + img_number: int, + batch: int, + progress: [str, bytes]): + self.ppOptions._pp_size = 0 + self.ppOptions._pp_width = 0 + self.ppOptions._pp_height = 0 + self._impl = new C.StatisticsCollector(deviceName.encode(), custom_cpu_library.encode(), custom_cldnn.encode(), modelFilePath.encode(), imagesPath.encode(), img_number, batch, self.ppOptions, progress.encode()) + + cpdef void collectStatisticsToIR(self, str outModelName, str output_precision): + self._impl.collectStatisticsToIR(outModelName.encode(), output_precision.encode()) + + def __dealloc__(self): + if self._impl is not NULL: + del self._impl diff --git a/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_c.pxd b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_c.pxd new file mode 100644 index 00000000000000..523875999adb58 --- /dev/null +++ b/inference-engine/ie_bridges/python/src/openvino/tools/statistics_collector/statistics_collector_c.pxd @@ -0,0 +1,24 @@ +from libc.stddef cimport size_t +from libcpp.string cimport string + + +cdef extern from "": + + cdef struct ct_preprocessingOptions: + string _pp_type + size_t _pp_size + size_t _pp_width + size_t _pp_height + + cdef cppclass StatisticsCollector: + StatisticsCollector(const string& deviceName, + const string& custom_cpu_library, + const string& custom_cldnn, + const string& modelFilePath, + const string& imagesPath, + size_t img_number, + size_t batch, + const ct_preprocessingOptions& preprocessingOptions, + const string& progress) except + + void collectStatisticsToIR(const string& outModelName, const string& output_precision) + ct_preprocessingOptions ppOptions diff --git a/inference-engine/include/builders/ie_deformable_convolution_layer.hpp b/inference-engine/include/builders/ie_deformable_convolution_layer.hpp new file mode 100644 index 00000000000000..7178d0ecd96371 --- /dev/null +++ b/inference-engine/include/builders/ie_deformable_convolution_layer.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +namespace InferenceEngine { +namespace Builder { + +/** + * @brief The class represents a builder for Deconvolution layer + */ +class INFERENCE_ENGINE_API_CLASS(DeformableConvolutionLayer): public ConvolutionLayer { +public: + /** + * @brief The constructor creates a builder with the name + * @param name Layer name + */ + explicit DeformableConvolutionLayer(const std::string& name = ""); + /** + * @brief The constructor creates a builder from generic builder + * @param layer pointer to generic builder + */ + explicit DeformableConvolutionLayer(const Layer::Ptr& layer); + /** + * @brief The constructor creates a builder from generic builder + * @param layer constant pointer to generic builder + */ + explicit DeformableConvolutionLayer(const Layer::CPtr& layer); + /** + * @brief Return deformable_group size + * @return Deformable group size + */ + size_t getDeformableGroup() const; + /** + * @brief Sets deformable group size + * @param deformableGroup Deformable group + * @return reference to layer builder + */ + Builder::DeformableConvolutionLayer& setDeformableGroup(size_t deformableGroup); +}; + +} // namespace Builder +} // namespace InferenceEngine diff --git a/inference-engine/include/builders/ie_grn_layer.hpp b/inference-engine/include/builders/ie_grn_layer.hpp index e544ab6e690a8c..c6f3e4802effa7 100644 --- a/inference-engine/include/builders/ie_grn_layer.hpp +++ b/inference-engine/include/builders/ie_grn_layer.hpp @@ -12,7 +12,7 @@ namespace InferenceEngine { namespace Builder { /** - * @brief The class represents a builder for ArgMax layer + * @brief The class represents a builder for GRN layer */ class INFERENCE_ENGINE_API_CLASS(GRNLayer): public LayerDecorator { public: diff --git a/inference-engine/include/builders/ie_lrn_layer.hpp b/inference-engine/include/builders/ie_lrn_layer.hpp index fcf58affb0cca2..625de12798624b 100644 --- a/inference-engine/include/builders/ie_lrn_layer.hpp +++ b/inference-engine/include/builders/ie_lrn_layer.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2019 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/include/cldnn/cldnn_config.hpp index 571ff510363a32..6153c0aaef3f58 100644 --- a/inference-engine/include/cldnn/cldnn_config.hpp +++ b/inference-engine/include/cldnn/cldnn_config.hpp @@ -56,5 +56,10 @@ DECLARE_CLDNN_CONFIG_KEY(GRAPH_DUMPS_DIR); */ DECLARE_CLDNN_CONFIG_KEY(SOURCES_DUMPS_DIR); +/** +* @brief This key turns usage of int8 optimizations and qunatized models on. +*/ +DECLARE_CLDNN_CONFIG_KEY(INT8_ENABLED); + } // namespace CLDNNConfigParams } // namespace InferenceEngine diff --git a/inference-engine/include/cpp/ie_cnn_net_reader.h b/inference-engine/include/cpp/ie_cnn_net_reader.h index 149f86a66b6fcb..61684f0c394609 100644 --- a/inference-engine/include/cpp/ie_cnn_net_reader.h +++ b/inference-engine/include/cpp/ie_cnn_net_reader.h @@ -17,6 +17,7 @@ #include "ie_common.h" #include "ie_icnn_net_reader.h" #include "details/ie_exception_conversion.hpp" +#include "details/os/os_filesystem.hpp" namespace InferenceEngine { /** @@ -35,6 +36,16 @@ class CNNNetReader { */ CNNNetReader() : actual(shared_from_irelease(InferenceEngine::CreateCNNNetReader())) {} +#ifdef ENABLE_UNICODE_PATH_SUPPORT + /** + * @brief Resolve wstring path then call orginal ReadNetwork + * ICNNNetReader::ReadNetwork + */ + void ReadNetwork(const std::wstring &filepath) { + CALL_STATUS_FNC(ReadNetwork, details::wStringtoMBCSstringChar(filepath).c_str()); + } +#endif + /** * @brief Wraps original method * ICNNNetReader::ReadNetwork @@ -55,15 +66,25 @@ class CNNNetReader { * @brief Wraps original method * ICNNNetReader::SetWeights */ - void SetWeights(const TBlob::Ptr &weights) const { + void SetWeights(const TBlob::Ptr &weights) { CALL_STATUS_FNC(SetWeights, weights); } +#ifdef ENABLE_UNICODE_PATH_SUPPORT + /** + * @brief Resolve wstring path then call orginal ReadWeights + * ICNNNetReader::ReadWeights + */ + void ReadWeights(const std::wstring &filepath) { + CALL_STATUS_FNC(ReadWeights, details::wStringtoMBCSstringChar(filepath).c_str()); + } +#endif + /** * @brief Wraps original method * ICNNNetReader::ReadWeights */ - void ReadWeights(const std::string &filepath) const { + void ReadWeights(const std::string &filepath) { CALL_STATUS_FNC(ReadWeights, filepath.c_str()); } diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/include/cpp/ie_cnn_network.h index 4ccccd8ce3ca5c..9f4f5bb05e5a6a 100644 --- a/inference-engine/include/cpp/ie_cnn_network.h +++ b/inference-engine/include/cpp/ie_cnn_network.h @@ -34,10 +34,11 @@ class CNNNetwork { CNNNetwork() = default; /** + * @deprecated Use CNNNetwork::CNNNetwork(std::shared_ptr) to construct a network * @brief Initialises helper class from externally managed pointer - * @deprecated use shared_pointers based version of CNNNetworks constructor * @param actual Pointer to the network object */ + INFERENCE_ENGINE_DEPRECATED explicit CNNNetwork(ICNNNetwork* actual) : actual(actual) { if (actual == nullptr) { THROW_IE_EXCEPTION << "CNNNetwork was not initialized."; @@ -142,11 +143,17 @@ class CNNNetwork { } /** + * @deprecated No needs to specify target device to the network. Use InferenceEngine::Core with target device directly * @brief Sets tha target device * @param device Device instance to set */ + #ifndef _WIN32 + INFERENCE_ENGINE_DEPRECATED + #endif void setTargetDevice(TargetDevice device) { + IE_SUPPRESS_DEPRECATED_START actual->setTargetDevice(device); + IE_SUPPRESS_DEPRECATED_END } /** @@ -212,7 +219,7 @@ class CNNNetwork { if (info) { auto data = info->getInputData(); if (data) { - shapes[data->name] = data->getTensorDesc().getDims(); + shapes[data->getName()] = data->getTensorDesc().getDims(); } } } diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/cpp/ie_executable_network.hpp index c9225a148e9bc7..2eb235af55748e 100644 --- a/inference-engine/include/cpp/ie_executable_network.hpp +++ b/inference-engine/include/cpp/ie_executable_network.hpp @@ -14,6 +14,7 @@ #include #include #include "ie_iexecutable_network.hpp" +#include "ie_plugin_ptr.hpp" #include "cpp/ie_infer_request.hpp" #include "cpp/ie_memory_state.hpp" #include "cpp/ie_cnn_network.h" @@ -26,11 +27,16 @@ namespace InferenceEngine { */ class ExecutableNetwork { IExecutableNetwork::Ptr actual; + InferenceEnginePluginPtr plg; public: ExecutableNetwork() = default; + ~ExecutableNetwork() { + actual = nullptr; + } - explicit ExecutableNetwork(IExecutableNetwork::Ptr actual) : actual(actual) {} + explicit ExecutableNetwork(IExecutableNetwork::Ptr actual, InferenceEnginePluginPtr plg = {}) + : actual(actual), plg(plg) {} /** * @brief Wraps original method @@ -68,7 +74,7 @@ class ExecutableNetwork { IInferRequest::Ptr req; CALL_STATUS_FNC(CreateInferRequest, req); if (req.get() == nullptr) THROW_IE_EXCEPTION << "Internal error: pointer to infer request is null"; - return InferRequest(req); + return InferRequest(req, plg); } /** @@ -79,7 +85,7 @@ class ExecutableNetwork { InferRequest::Ptr CreateInferRequestPtr() { IInferRequest::Ptr req; CALL_STATUS_FNC(CreateInferRequest, req); - return std::make_shared(req); + return std::make_shared(req, plg); } /** @@ -139,6 +145,41 @@ class ExecutableNetwork { return controller; } + /** + * @brief Sets configuration for current executable network + * @param config Map of pairs: (config parameter name, config parameter value) + * @param resp Pointer to the response message that holds a description of an error if any occurred + */ + void SetConfig(const std::map &config) { + CALL_STATUS_FNC(SetConfig, config); + } + + /** @brief Gets configuration dedicated to plugin behaviour + * @param name - config key, can be found in ie_plugin_config.hpp + * @param options - configuration details for coonfig value + * @param result - value of config corresponding to config key + * @param resp Pointer to the response message that holds a description of an error if any occurred + */ + Parameter GetConfig(const std::string &name) const { + Parameter configValue; + CALL_STATUS_FNC(GetConfig, name, configValue); + return configValue; + } + + /** + * @brief Gets general runtime metric for dedicated hardware + * @param name - metric name to request + * @param options - configuration details for metric + * @param result - metric value corresponding to metric key + * @param resp - Pointer to the response message that holds a description of an error if any + * occurred + * @return code of the operation. OK if succeeded + */ + Parameter GetMetric(const std::string &name) const { + Parameter metricValue; + CALL_STATUS_FNC(GetMetric, name, metricValue); + return metricValue; + } using Ptr = std::shared_ptr; }; diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/include/cpp/ie_infer_request.hpp index 1205d3e1297ed9..5d1eeb4838fff0 100644 --- a/inference-engine/include/cpp/ie_infer_request.hpp +++ b/inference-engine/include/cpp/ie_infer_request.hpp @@ -13,6 +13,7 @@ #include #include "ie_iinfer_request.hpp" #include "details/ie_exception_conversion.hpp" +#include "ie_plugin_ptr.hpp" namespace InferenceEngine { @@ -57,6 +58,7 @@ class CompletionCallbackWrapper : public ICom */ class InferRequest { IInferRequest::Ptr actual; + InferenceEnginePluginPtr plg; std::shared_ptr callback; static void callWrapper(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code) { @@ -69,6 +71,10 @@ class InferRequest { public: InferRequest() = default; + ~InferRequest() { + actual = nullptr; + } + /** * @brief Sets input/output data to infer * @note: Memory allocation does not happen @@ -147,7 +153,8 @@ class InferRequest { * constructs InferRequest from initialised shared_pointer * @param actual */ - explicit InferRequest(IInferRequest::Ptr request) : actual(request) {} + explicit InferRequest(IInferRequest::Ptr request, InferenceEnginePluginPtr plg = {}) + : actual(request), plg(plg) {} /** * @brief Start inference of specified input(s) in asynchronous mode diff --git a/inference-engine/include/cpp/ie_plugin_cpp.hpp b/inference-engine/include/cpp/ie_plugin_cpp.hpp index e70b7789764597..8d5744a016f9f4 100644 --- a/inference-engine/include/cpp/ie_plugin_cpp.hpp +++ b/inference-engine/include/cpp/ie_plugin_cpp.hpp @@ -51,11 +51,14 @@ class InferencePlugin { } /** - * @brief Wraps original method - * IInferencePlugin::LoadNetwork + * @deprecated Use InferencePlugin::LoadNetwork(ICNNNetwork &, const std::map &) + * @brief Wraps original method IInferencePlugin::LoadNetwork(ICNNNetwork &, ResponseDesc *) */ + INFERENCE_ENGINE_DEPRECATED void LoadNetwork(ICNNNetwork &network) { + IE_SUPPRESS_DEPRECATED_START CALL_STATUS_FNC(LoadNetwork, network); + IE_SUPPRESS_DEPRECATED_END } /** @@ -65,7 +68,7 @@ class InferencePlugin { ExecutableNetwork LoadNetwork(ICNNNetwork &network, const std::map &config) { IExecutableNetwork::Ptr ret; CALL_STATUS_FNC(LoadNetwork, ret, network, config); - return ExecutableNetwork(ret); + return ExecutableNetwork(ret, actual); } /** @@ -76,25 +79,30 @@ class InferencePlugin { IExecutableNetwork::Ptr ret; CALL_STATUS_FNC(LoadNetwork, ret, network, config); if (ret.get() == nullptr) THROW_IE_EXCEPTION << "Internal error: pointer to executable network is null"; - return ExecutableNetwork(ret); + return ExecutableNetwork(ret, actual); } /** - * @deprecated Loads IExecutableNetwork to create IInferRequest. - * @brief Wraps original method - * IInferencePlugin::Infer(const BlobMap&, BlobMap&, ResponseDesc *resp) + * @deprecated Use IExecutableNetwork to create IInferRequest. + * @brief Wraps original method IInferencePlugin::Infer(const BlobMap&, BlobMap&, ResponseDesc *) */ + INFERENCE_ENGINE_DEPRECATED void Infer(const BlobMap &input, BlobMap &result) { + IE_SUPPRESS_DEPRECATED_START CALL_STATUS_FNC(Infer, input, result); + IE_SUPPRESS_DEPRECATED_END } /** - * @brief Wraps original method - * IInferencePlugin::GetPerformanceCounts + * @deprecated Use IInferRequest to get performance counters + * @brief Wraps original method IInferencePlugin::GetPerformanceCounts */ + INFERENCE_ENGINE_DEPRECATED std::map GetPerformanceCounts() const { std::map perfMap; + IE_SUPPRESS_DEPRECATED_START CALL_STATUS_FNC(GetPerformanceCounts, perfMap); + IE_SUPPRESS_DEPRECATED_END return perfMap; } @@ -118,25 +126,25 @@ class InferencePlugin { * @brief Wraps original method * IInferencePlugin::ImportNetwork */ - ExecutableNetwork ImportNetwork(const std::string &modelFileName, const std::map &config) { + ExecutableNetwork ImportNetwork(const std::string &modelFileName, const std::map &config) { IExecutableNetwork::Ptr ret; CALL_STATUS_FNC(ImportNetwork, ret, modelFileName, config); - return ExecutableNetwork(ret); + return ExecutableNetwork(ret, actual); } /** - * @depricated Use the version with config parameter + * @deprecated Use InferencePlugin::QueryNetwork(const ICNNNetwork &, const std::map &, QueryNetworkResult &) const * @brief Wraps original method - * IInferencePlugin::QueryNetwork + * IInferencePlugin::QueryNetwork(const ICNNNetwork&, QueryNetworkResult& ) const */ + INFERENCE_ENGINE_DEPRECATED void QueryNetwork(const ICNNNetwork &network, QueryNetworkResult &res) const { - actual->QueryNetwork(network, res); - if (res.rc != OK) THROW_IE_EXCEPTION << res.resp.msg; + QueryNetwork(network, { }, res); } /** * @brief Wraps original method - * IInferencePlugin::QueryNetwork + * IInferencePlugin::QueryNetwork(const ICNNNetwork&, const std::map &, QueryNetworkResult&) const */ void QueryNetwork(const ICNNNetwork &network, const std::map &config, QueryNetworkResult &res) const { actual->QueryNetwork(network, config, res); @@ -144,6 +152,7 @@ class InferencePlugin { } /** + * @brief Converts InferenceEngine to InferenceEnginePluginPtr pointer * @brief Returns wrapped object */ operator InferenceEngine::InferenceEnginePluginPtr() { @@ -151,11 +160,15 @@ class InferencePlugin { } /** - * @return wrapped Hetero object if underlined object is HeteroPlugin instance, nullptr otherwise - */ + * @deprecated Deprecated since HeteroPluginPtr is deprecated + * @brief Converts InferenceEngine to HeteroPluginPtr pointer + * @return wrapped Hetero object if underlined object is HeteroPlugin instance, nullptr otherwise + */ + IE_SUPPRESS_DEPRECATED_START operator InferenceEngine::HeteroPluginPtr() { return actual; } + IE_SUPPRESS_DEPRECATED_END /** * @brief Shared pointer on InferencePlugin object diff --git a/inference-engine/include/details/ie_exception.hpp b/inference-engine/include/details/ie_exception.hpp index 5285f0568d02a8..16ba6bb80e32ee 100644 --- a/inference-engine/include/details/ie_exception.hpp +++ b/inference-engine/include/details/ie_exception.hpp @@ -35,11 +35,11 @@ class NullStream { public : template - NullStream & operator << (const T &obj) noexcept { + NullStream & operator << (const T &) noexcept { return *this; } - NullStream & operator<< (std::ostream & (*manip)(std::ostream &)) noexcept { + NullStream & operator<< (std::ostream & (*)(std::ostream &)) noexcept { return *this; } }; diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/include/details/ie_so_pointer.hpp index a6d7372eff3da3..9c803e09c2f222 100644 --- a/inference-engine/include/details/ie_so_pointer.hpp +++ b/inference-engine/include/details/ie_so_pointer.hpp @@ -74,6 +74,7 @@ class SOCreatorTrait {}; */ template class SOPointer { +IE_SUPPRESS_DEPRECATED_START template friend class SOPointer; public: /** @@ -91,6 +92,18 @@ class SOPointer { SymbolLoader(_so_loader).template instantiateSymbol(SOCreatorTrait::name))) { } + /** + * @brief Constructs an object with existing reference + * @param _pointedObj_ Existing reference to wrap + */ + explicit SOPointer(T * _pointedObj_) + : _so_loader() + , _pointedObj(_pointedObj_) { + if (_pointedObj == nullptr) { + THROW_IE_EXCEPTION << "Cannot create SOPointer from nullptr"; + } + } + /** * @brief The copy-like constructor, can create So Pointer that dereferenced into child type if T is derived of U * @param that copied SOPointer object @@ -99,6 +112,9 @@ class SOPointer { SOPointer(const SOPointer & that) : _so_loader(std::dynamic_pointer_cast(that._so_loader)), _pointedObj(std::dynamic_pointer_cast(that._pointedObj)) { + if (_pointedObj == nullptr) { + THROW_IE_EXCEPTION << "Cannot create object from SOPointer reference"; + } } /** @@ -149,6 +165,7 @@ class SOPointer { * @brief Gets a smart pointer to the custom object */ std::shared_ptr _pointedObj; +IE_SUPPRESS_DEPRECATED_END }; } // namespace details diff --git a/inference-engine/include/details/os/os_filesystem.hpp b/inference-engine/include/details/os/os_filesystem.hpp new file mode 100644 index 00000000000000..d94c7da0d692ab --- /dev/null +++ b/inference-engine/include/details/os/os_filesystem.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief This is a header file with functions related to filesystem operations. + * @file os_filesystem.h + */ +#pragma once + +#ifdef ENABLE_UNICODE_PATH_SUPPORT +#include +#include +#include + +namespace InferenceEngine { +namespace details { + +/** +* @brief Conversion from wide character string to a single-byte chain. +*/ +inline const std::string wStringtoMBCSstringChar(const std::wstring& wstr) { + std::wstring_convert> wstring_decoder; + return wstring_decoder.to_bytes(wstr); +} + +/** +* @brief Conversion from single-byte chain to wide character string. +*/ +inline const std::wstring multiByteCharToWString(const char* str) { + std::wstring_convert> wstring_encoder; + std::wstring result = wstring_encoder.from_bytes(str); + return result; +} + +} // namespace details +} // namespace InferenceEngine + +#endif diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/include/gna/gna_config.hpp index 6b9cbe8130155c..ad8acf338e1391 100644 --- a/inference-engine/include/gna/gna_config.hpp +++ b/inference-engine/include/gna/gna_config.hpp @@ -53,6 +53,7 @@ DECLARE_GNA_CONFIG_VALUE(AUTO); DECLARE_GNA_CONFIG_VALUE(HW); DECLARE_GNA_CONFIG_VALUE(SW); DECLARE_GNA_CONFIG_VALUE(SW_EXACT); +DECLARE_GNA_CONFIG_VALUE(SW_FP32); /** * @brief if enabled produced minimum memory footprint for loaded network in GNA memory, default value is YES diff --git a/inference-engine/include/hetero/hetero_plugin_config.hpp b/inference-engine/include/hetero/hetero_plugin_config.hpp index 4f2e1669e4aee6..fb17d07d6d5657 100644 --- a/inference-engine/include/hetero/hetero_plugin_config.hpp +++ b/inference-engine/include/hetero/hetero_plugin_config.hpp @@ -12,6 +12,8 @@ #pragma once #include +#include + #include "ie_plugin_config.hpp" namespace InferenceEngine { @@ -28,6 +30,12 @@ namespace HeteroConfigParams { * This option should be used with values: CONFIG_VALUE(NO) (default) or CONFIG_VALUE(YES) */ DECLARE_HETERO_CONFIG_KEY(DUMP_GRAPH_DOT); + +/** + * @deprecated Use DLIA_CONFIG_KEY(DUMP_SUPPORTED_LAYERS_INFORMATION) FPGA configuration boolean key instead + * @brief The bool key to define whether information messages with a reason are printed in case the layer is unsupported by DLA + */ +INFERENCE_ENGINE_DEPRECATED DECLARE_HETERO_CONFIG_KEY(DUMP_DLA_MESSAGES); } // namespace HeteroConfigParams diff --git a/inference-engine/include/ie_api.h b/inference-engine/include/ie_api.h index 76bc7e2e73d8c8..2394c0c5da0356 100644 --- a/inference-engine/include/ie_api.h +++ b/inference-engine/include/ie_api.h @@ -18,6 +18,7 @@ #else #if defined(_WIN32) #define INFERENCE_ENGINE_CDECL + #ifdef IMPLEMENT_INFERENCE_ENGINE_API #define INFERENCE_ENGINE_API(type) extern "C" __declspec(dllexport) type __cdecl #define INFERENCE_ENGINE_API_CPP(type) __declspec(dllexport) type __cdecl @@ -40,3 +41,33 @@ #endif #endif #endif + +#if defined(_WIN32) + #define INFERENCE_ENGINE_DEPRECATED __declspec(deprecated) +#else + #define INFERENCE_ENGINE_DEPRECATED __attribute__((deprecated)) +#endif + +// Suppress warning "-Wdeprecated-declarations" / C4996 +#if defined(_MSC_VER) + #define IE_DO_PRAGMA(x) __pragma(x) +#elif defined(__GNUC__) + #define IE_DO_PRAGMA(x) _Pragma (#x) +#else + #define IE_DO_PRAGMA(x) +#endif + +#ifdef _MSC_VER +#define IE_SUPPRESS_DEPRECATED_START \ + IE_DO_PRAGMA(warning(push)) \ + IE_DO_PRAGMA(warning(disable: 4996)) +#define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(warning(pop)) +#elif defined(__clang__) || ((__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 405)) +#define IE_SUPPRESS_DEPRECATED_START \ + IE_DO_PRAGMA(GCC diagnostic push) \ + IE_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations") +#define IE_SUPPRESS_DEPRECATED_END IE_DO_PRAGMA(GCC diagnostic pop) +#else +#define IE_SUPPRESS_DEPRECATED_START +#define IE_SUPPRESS_DEPRECATED_END +#endif diff --git a/inference-engine/include/ie_blob.h b/inference-engine/include/ie_blob.h index 8e4cf7e16ecac4..d2628ad5cd4150 100644 --- a/inference-engine/include/ie_blob.h +++ b/inference-engine/include/ie_blob.h @@ -16,6 +16,7 @@ #include #include #include +#include #include "ie_common.h" #include "details/ie_exception.hpp" @@ -28,7 +29,8 @@ namespace InferenceEngine { /** - * @brief This class implements a container object that represents a tensor in memory (host and remote/accelerated) + * @brief This class represents a universal container in the Inference Engine + * @note Each Blob implementation must be derived from this Blob class directly or indirectly */ class Blob { public: @@ -43,25 +45,28 @@ class Blob { using CPtr = std::shared_ptr; /** - * @deprecated Please use TensorDesc to get the precision + * @deprecated Use Blob::getTensorDesc and InferenceEngine::TensorDesc::getPrecision to get the precision * @brief Returns the tensor precision of the current Blob object */ + INFERENCE_ENGINE_DEPRECATED Precision type() const noexcept { return tensorDesc.getPrecision(); } /** - * @deprecated Please use TensorDesc to get the precision + * @deprecated Use Blob::getTensorDesc and InferenceEngine::TensorDesc::getPrecision to get the precision * @brief Returns the tensor precision of the current Blob object */ + INFERENCE_ENGINE_DEPRECATED Precision precision() const noexcept { return tensorDesc.getPrecision(); } /** - * @deprecated Please use TensorDesc to get the current layout + * @deprecated Use Blob::getTensorDesc and InferenceEngine::TensorDesc::getLayout to get the current layout * @brief Returns the tensor layout of the current Blob object */ + INFERENCE_ENGINE_DEPRECATED Layout layout() const noexcept { return tensorDesc.getLayout(); } @@ -78,6 +83,60 @@ class Blob { */ virtual ~Blob() = default; + /** + * @brief Checks if the Blob object can be cast to the type T* + * @tparam T Type to be checked. Must represent a class derived from the Blob + * @return true if this object can be dynamically cast to the type T*. Otherwise, false + */ + template::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + bool is() noexcept { + return dynamic_cast(this) != nullptr; + } + + /** + * @brief Checks if the Blob object can be cast to the type const T* + * @tparam T Type to be checked. Must represent a class derived from the Blob + * @return true if this object can be dynamically cast to the type const T*. Otherwise, false + */ + template::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + bool is() const noexcept { + return dynamic_cast(this) != nullptr; + } + + /** + * @brief Casts this Blob object to the type T*. Use InferenceEngine::as() to operate with + * shared Blob objects instead of raw pointers + * @tparam T Type to cast to. Must represent a class derived from the Blob + * @return Raw pointer to the object of the type T or nullptr on error + */ + template::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + T* as() noexcept { + return dynamic_cast(this); + } + + /** + * @brief Casts this Blob object to the type const T*. Use InferenceEngine::as() to operate with + * shared Blob objects instead of raw pointers + * @tparam T Type to cast to. Must represent a class derived from the Blob + * @return Raw pointer to the object of the type const T or nullptr on error + */ + template::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + const T* as() const noexcept { + return dynamic_cast(this); + } + /** * @brief Constructor. Creates an empty Blob object with the specified precision. * @param tensorDesc Defines the layout and dims of the blob @@ -85,46 +144,51 @@ class Blob { explicit Blob(const TensorDesc &tensorDesc): tensorDesc(tensorDesc) {} /** - * @deprecated Please use TensorDesc for Blob initialization + * @deprecated Use Blob::Blob(const TensorDesc &). * @brief Constructor. Creates an empty Blob object with the specified precision. * @param p Precision type */ - explicit Blob(Precision p) : Blob(p, NCHW) {} + INFERENCE_ENGINE_DEPRECATED + explicit Blob(Precision p) : Blob(TensorDesc(p, NCHW)) {} /** - * @deprecated Please use TensorDesc for Blob initialization + * @deprecated Use Blob::Blob(const TensorDesc &). * @brief The constructor creates an empty Blob object with the specified precision and layout. * @param p Precision type * @param l Layout */ - Blob(Precision p, Layout l) : tensorDesc(p, l) {} + INFERENCE_ENGINE_DEPRECATED + Blob(Precision p, Layout l) : Blob(TensorDesc(p, l)) {} /** - * @deprecated Please use TensorDesc for Blob initialization + * @deprecated Use Blob::Blob(const TensorDesc &). * @brief The constructor creates an empty Blob object with the specified precision and dimensions. * @param p Tensor precision type * @param dims Tensor dimensions vector */ + INFERENCE_ENGINE_DEPRECATED Blob(Precision p, const SizeVector &dims) - : Blob(p, TensorDesc::getLayoutByDims(dims), dims) {} + : Blob({ p, SizeVector(dims.rbegin(), dims.rend()), TensorDesc::getLayoutByDims(dims) }) {} /** - * @deprecated Please use TensorDesc for Blob initialization + * @deprecated Use Blob::Blob(const TensorDesc &). * @brief The constructor creates an empty Blob object with the specified precision, layout and dimensions. * @param p tensor precision type * @param l tensor layout * @param dims Tensor dimensions vector with reversed order */ + INFERENCE_ENGINE_DEPRECATED Blob(Precision p, Layout l, const SizeVector &dims) - : tensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l) {} + : Blob(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l)) {} /** - * @deprecated It works with reversed dimensions. Please create a new blob if you want to change a size. + * @deprecated The method works with reversed dimensions. Create a new blob if you want to change a size. * @brief Changes Tensor size to the specified dimensions. If it was allocated, the previous data is deallocated and lost. * @param dims New dimensions to set * @param layout New layout to set * @return Total number of elements (a product of all the dimensions) */ + INFERENCE_ENGINE_DEPRECATED size_t Resize(const SizeVector &dims, Layout layout = Layout::ANY) noexcept { try { bool bret = deallocate(); @@ -144,12 +208,13 @@ class Blob { } /** - * @deprecated It works with reversed dimensions. Please use TensorDescriptor.reshape(). + * @deprecated The method works with reversed dimensions. Use Blob::getTensorDesc and InferenceEngine::TensorDesc::reshape. * @brief Changes tensor size to the specified dimensions without changing memory. The total size remains unchanged as well as the memory layout. * @param dims New dimensions to set * @param layout New layout to set * @return The total number of elements (a product of all the dims) */ + INFERENCE_ENGINE_DEPRECATED size_t Reshape(const SizeVector &dims, Layout layout = Layout::ANY) noexcept { try { if (product(tensorDesc.getDims()) != product(dims)) { @@ -168,9 +233,10 @@ class Blob { } /** - * @deprecated Please use TensorDesc for working with dimensions. + * @deprecated Use Blob::getTensorDesc and InferenceEngine::TensorDesc::getDims. * @brief Returns the tensor dimensions vector with reversed order. */ + INFERENCE_ENGINE_DEPRECATED const SizeVector dims() const noexcept { return SizeVector(tensorDesc.getDims().rbegin(), tensorDesc.getDims().rend()); } @@ -178,22 +244,33 @@ class Blob { /** * @brief Returns the tensor description */ - const TensorDesc &getTensorDesc() const noexcept { + virtual const TensorDesc &getTensorDesc() const noexcept { + return tensorDesc; + } + + /** + * @brief Returns the tensor description + */ + virtual TensorDesc &getTensorDesc() noexcept { return tensorDesc; } /** - * @brief Returns the total number of elements (a product of all the dims) + * @brief By default, returns the total number of elements (a product of all the dims or 1 for scalar) + * + * Return value and its interpretation heavily depend on the blob type */ - size_t size() const noexcept { + virtual size_t size() const noexcept { + if (tensorDesc.getLayout() == Layout::SCALAR) + return 1; return product(tensorDesc.getDims()); } /** * @brief Returns the size of the current Blob in bytes. */ - size_t byteSize() const noexcept { - return product(tensorDesc.getDims()) * element_size(); + virtual size_t byteSize() const noexcept { + return size() * element_size(); } /** @@ -245,17 +322,151 @@ class Blob { return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies()); } + /** + * @brief Gets an allocator for allocator-based blobs + * @return The allocator for allocator-based blobs or nullptr if there is none + */ + virtual const std::shared_ptr &getAllocator() const noexcept = 0; + + /** + * @brief Gets a handle to allocated memory + * @return The handle to allocated memory for allocator-based blobs or nullptr if there is none + */ + virtual void *getHandle() const noexcept = 0; + + template friend + class TBlobProxy; +}; + +/** + * @brief Helper cast function to work with shared Blob objects + * @return shared_ptr to the type T. Returned shared_ptr shares ownership of the object with the + * input Blob::Ptr + */ +template::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> +std::shared_ptr as(const Blob::Ptr& blob) noexcept { + return std::dynamic_pointer_cast(blob); +} + +/** + * @brief Helper cast function to work with shared Blob objects + * @return shared_ptr to the type const T. Returned shared_ptr shares ownership of the object with + * the input Blob::Ptr + */ +template::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> +std::shared_ptr as(const Blob::CPtr& blob) noexcept { + return std::dynamic_pointer_cast(blob); +} + +/** + * @brief This class implements a container object that represents a tensor in memory (host and + * remote/accelerated) + * @note Any Blob implementation that represents a concept of a tensor in memory (for example, + * TBlob) must be a subclass of MemoryBlob instead of Blob + */ +class MemoryBlob : public Blob { +public: + /** + * @brief A smart pointer to the MemoryBlob object + */ + using Ptr = std::shared_ptr; + + /** + * @brief A smart pointer to the const MemoryBlob object + */ + using CPtr = std::shared_ptr; + + /** + * @brief MemoryBlob virtual destructor + */ + virtual ~MemoryBlob() = default; + + /** + * @brief Constructor. Creates an empty MemoryBlob object with the specified precision. + * @param tensorDesc Defines the layout and dims of the blob + */ + explicit MemoryBlob(const TensorDesc& tensorDesc): Blob(tensorDesc) {} + + /** + * @brief Returns the tensor description + */ + const TensorDesc &getTensorDesc() const noexcept override { + return tensorDesc; + } + + /** + * @brief Returns the tensor description + */ + TensorDesc &getTensorDesc() noexcept override { + return tensorDesc; + } + + /** + * @brief Returns the total number of elements, which is a product of all the dimensions + */ + size_t size() const noexcept override { + if (tensorDesc.getLayout() == Layout::SCALAR) + return 1; + return product(tensorDesc.getDims()); + } + + /** + * @brief Returns the size of the current Blob in bytes + */ + size_t byteSize() const noexcept override { + return size() * element_size(); + } + + /** + * @brief Returns the number of bytes per element. The overall MemoryBlob capacity is size() * element_size(). + * Abstract method. + */ + size_t element_size() const noexcept override = 0; + + /** + * @brief Allocates memory to store the data. + * Abstract method. + */ + void allocate() noexcept override = 0; + + /** + * @brief Releases previously allocated data. + * Abstract method. + */ + bool deallocate() noexcept override = 0; + + /** + * @brief Gets access to the allocated memory. + * Abstract method. + * @return A LockedMemory object + */ + LockedMemory buffer() noexcept override = 0; + + /** + * @brief Gets read-only access to the allocated memory. + * Abstract method. + * @return A LockedMemory object + */ + LockedMemory cbuffer() const noexcept override = 0; + +protected: /** * @brief Gets the allocator for allocator-based blobs. * @return The allocator for allocator-based blobs or if there is none then a nullptr. */ - virtual const std::shared_ptr &getAllocator() const noexcept = 0; + const std::shared_ptr &getAllocator() const noexcept override = 0; /** * @brief Gets the handle to allocated memory. * @return The handle to allocated memory for allocator-based blobs or if there is none then a nullptr. */ - virtual void *getHandle() const noexcept = 0; + void *getHandle() const noexcept override = 0; template friend class TBlobProxy; @@ -271,7 +482,7 @@ using BlobMap = std::map; */ template::value>> -class TBlob : public Blob { +class TBlob : public MemoryBlob { template friend class TBlob; @@ -284,10 +495,10 @@ class TBlob : public Blob { /** * @brief Creates a TBlob object with the specified dimensions and layout but does not allocate the memory. - * Please use the allocate() method to allocate memory. + * Use the allocate() method to allocate memory. * @param tensorDesc Tensor description */ - explicit TBlob(const TensorDesc& tensorDesc): Blob(tensorDesc) {} + explicit TBlob(const TensorDesc& tensorDesc): MemoryBlob(tensorDesc) {} /** * @brief The constructor creates a TBlob object with the specified dimensions and layout @@ -297,7 +508,7 @@ class TBlob : public Blob { * @param data_size Length of the pre-allocated array. If not set, size is assumed equal * to the dot product of dims. */ - TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0): Blob(tensorDesc) { + TBlob(const TensorDesc& tensorDesc, T* ptr, size_t data_size = 0): MemoryBlob(tensorDesc) { if (data_size == 0) { data_size = size(); } @@ -319,27 +530,33 @@ class TBlob : public Blob { * @param alloc Allocator to be used */ TBlob(const TensorDesc& tensorDesc, const std::shared_ptr& alloc) - : Blob(tensorDesc), _allocator(alloc) { + : MemoryBlob(tensorDesc), _allocator(alloc) { } /** - * @deprecated Please use TensorDesc for Blob initialization. + * @deprecated Use TBlob::TBlob(const TensorDesc&). + * @brief Creates a TBlob object with the specified precision and type, but does not allocate the memory. + * Use the allocate() method to allocate memory. + * @param p Precision + * @param l Layout */ - explicit TBlob(Precision p, Layout l) : Blob(p, l) {} + INFERENCE_ENGINE_DEPRECATED + explicit TBlob(Precision p, Layout l) : MemoryBlob(TensorDesc(p, l)) {} /** - * @deprecated Please use TensorDesc for Blob initialization. - * @brief Creates a TBlob object with the specified dimensions but does not allocate the memory. Please use the allocate() method to allocate memory. + * @deprecated Use TBlob::TBlob(const TensorDesc&). + * @brief Creates a TBlob object with the specified dimensions but does not allocate the memory. Use the allocate() method to allocate memory. * @param p Precision * @param l Layout * @param dims Tensor dimensions */ + INFERENCE_ENGINE_DEPRECATED TBlob(Precision p, Layout l, const SizeVector& dims) - : Blob(p, l, dims) { + : MemoryBlob(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l)) { } /** - * @deprecated Please use TensorDesc for Blob initialization. + * @deprecated Use TBlob::TBlob(const TensorDesc&). * @brief The constructor creates a TBlob object with the specified dimensions on the pre-allocated memory. Therefore, the allocate() call is not required. * @details The TBlob object doesn't own memory that is pointed to by the ptr. Therefore, it doesn't free the memory after the TBlob object is destroyed. * Also certain operations might fail: @@ -350,7 +567,9 @@ class TBlob : public Blob { * @param ptr Pointer to the pre-allocated memory * @param data_size Length of the pre-allocated array. If not set, size is assumed equal to dot product of dims. */ - TBlob(Precision p, Layout l, const SizeVector& dims, T* ptr, size_t data_size = 0) : Blob(p, l, dims) { + INFERENCE_ENGINE_DEPRECATED + TBlob(Precision p, Layout l, const SizeVector& dims, T* ptr, size_t data_size = 0) : + MemoryBlob(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l)) { if (data_size == 0) { data_size = size(); } @@ -363,22 +582,23 @@ class TBlob : public Blob { } /** - * @deprecated Please use TensorDesc for Blob initialization. + * @deprecated Use TBlob::TBlob(const TensorDesc&). * @brief Constructor. Creates a TBlob object with the specified precision, layout, dimensions and custom memory allocator. * @param p Precision * @param l Layout * @param dims Tensor dimensions * @param alloc Allocator to be used */ + INFERENCE_ENGINE_DEPRECATED TBlob(Precision p, Layout l, const SizeVector &dims, std::shared_ptr alloc) - : Blob(p, l, dims), _allocator(alloc) { + : MemoryBlob(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l)), _allocator(alloc) { } /** * @brief The copy constructor data is reallocated and copied from the source to the target blob. * @param blob Source blob */ - TBlob(const TBlob &blob) : Blob(blob.getTensorDesc()) { + TBlob(const TBlob &blob) : MemoryBlob(blob.getTensorDesc()) { copyFrom(blob); } @@ -386,7 +606,7 @@ class TBlob : public Blob { * @brief A move constructor. * @param blob rvalue to make a move from */ - TBlob(TBlob &&blob) : Blob(blob.getTensorDesc()) { + TBlob(TBlob &&blob) : MemoryBlob(blob.getTensorDesc()) { moveFrom(blob); } @@ -432,10 +652,11 @@ class TBlob : public Blob { } /** - * @deprecated Deprecated to avoid memcpy() calls. + * @deprecated Deprecated to avoid memcpy() calls. Use TBlob::buffer to get raw pointer and set data * @brief Copies data from the given vector to the blob. * @param that Vector of values to copy to the blob */ + INFERENCE_ENGINE_DEPRECATED void set(const std::vector &that) { if (tensorDesc.getDims().size() != 0 && that.size() != product(tensorDesc.getDims())) THROW_IE_EXCEPTION << "Size mismatch between dims and vector"; @@ -447,7 +668,7 @@ class TBlob : public Blob { allocate(); } auto memptr = data(); - memcpy(memptr, that.data(), product(tensorDesc.getDims()) * sizeof(T)); + memcpy(memptr, that.data(), byteSize()); } /** @@ -457,7 +678,7 @@ class TBlob : public Blob { if (_handle != nullptr) { getAllocator()->free(_handle); } - _handle = getAllocator()->alloc(TBlob::product(tensorDesc.getDims()) * sizeof(T)); + _handle = getAllocator()->alloc(byteSize()); } /** @@ -539,7 +760,7 @@ class TBlob : public Blob { tensorDesc = blob.tensorDesc; this->allocate(); auto memptr = data(); - memcpy(memptr, blob.readOnly(), product(tensorDesc.getDims()) * sizeof(T)); + memcpy(memptr, blob.readOnly(), byteSize()); } /** @@ -598,7 +819,7 @@ class TBlob : public Blob { }; /** - * @deprecated Use TensorDesc to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Creates a blob with given precision and dimensions. * @tparam Type Type of the shared pointer to be created * @param p Given precision @@ -606,15 +827,16 @@ class TBlob : public Blob { * @return A shared pointer to the created blob */ template +INFERENCE_ENGINE_DEPRECATED inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, const SizeVector &dims) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - return std::make_shared>(p, l, dims); + return std::make_shared>(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l)); } /** - * @deprecated Use TensorDesc to create Blob::Ptr + * @deprecated Use the make_shared_blob signature which accepts TensorDesc * @brief Creates a blob with the NCHW layout, given precision, and given dimensions. * @tparam Type Type of the shared pointer to be created * @param p Given precision @@ -622,15 +844,16 @@ inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, const S * @return A shared pointer to the created blob */ template +INFERENCE_ENGINE_DEPRECATED inline typename TBlob::Ptr make_shared_blob(Precision p, const SizeVector &dims) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - return make_shared_blob(p, TensorDesc::getLayoutByDims(dims), dims); + return make_shared_blob(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), TensorDesc::getLayoutByDims(dims))); } /** - * @deprecated Use TensorDesc to create Blob::Ptr + * @deprecated Use the make_shared_blob signature which accepts TensorDesc * @brief Creates a blob with the given precision. * @tparam Type Type of the shared pointer to be created * @param p Given precision @@ -638,15 +861,16 @@ inline typename TBlob::Ptr make_shared_blob(Precision p, const SizeVector * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename InferenceEngine::TBlob::Ptr make_shared_blob(Precision p, Layout l, const TArg &arg) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - return std::make_shared>(p, l, arg); + return std::make_shared>(TensorDesc(p, SizeVector(arg.rbegin(), arg.rend()), l)); } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr + * @deprecated Use the make_shared_blob signature which accepts TensorDesc * @brief Creates a blob with the NCHW layout and given tensor precision. * @tparam Type Type of the shared pointer to be created * @param p Given precision @@ -654,11 +878,12 @@ inline typename InferenceEngine::TBlob::Ptr make_shared_blob(Precision p, * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename InferenceEngine::TBlob::Ptr make_shared_blob(Precision p, const TArg &arg) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - return make_shared_blob(p, TensorDesc::getLayoutByDims(arg), arg); + return make_shared_blob(TensorDesc(p, SizeVector(arg.rbegin(), arg.rend()), TensorDesc::getLayoutByDims(arg))); } /** @@ -707,7 +932,7 @@ inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TensorD } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Gets a shared pointer for the new TBlob instance. * The created instance is based on move semantics from the given TBlob instance. * @tparam TypeTo Type of the shared pointer to be created @@ -715,6 +940,7 @@ inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TensorD * @return A shared pointer to the newly created blob of the given type */ template +INFERENCE_ENGINE_DEPRECATED inline typename InferenceEngine::TBlob::Ptr make_shared_blob(TBlob &&arg) { return std::make_shared>(std::move(arg)); } @@ -731,22 +957,23 @@ inline typename InferenceEngine::TBlob::Ptr make_shared_blob(const TBlob } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Creates a blob with the given precision. * @tparam TypeTo Type of the shared pointer to be created * @param p Given precision * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename InferenceEngine::TBlob::Ptr make_shared_blob(Precision p, Layout l = NCHW) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - return std::make_shared>(p, l); + return std::make_shared>(TensorDesc(p, l)); } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Creates a blob with the given precision, layout and dimensions from the vector of values. * @tparam TypeTo Type of the shared pointer to be created * @param p Given precision @@ -756,17 +983,18 @@ inline typename InferenceEngine::TBlob::Ptr make_shared_blob(Precision p * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, SizeVector dims, const std::vector &arg) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - auto blob = std::make_shared>(p, l, dims); + auto blob = std::make_shared>(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l)); blob->set(arg); return blob; } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Creates a blob with the given precision from the vector of values. * @tparam TypeTo Type of the shared pointer to be created * @param p Given precision @@ -775,17 +1003,18 @@ inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, SizeV * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, const std::vector &arg) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - auto blob = std::make_shared>(p, l); + auto blob = std::make_shared>(TensorDesc(p, l)); blob->set(arg); return blob; } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Creates a blob with the NCHW layout and the given precision from the vector of values. * @tparam TypeTo Type of the shared pointer to be created * @param p Given precision @@ -793,15 +1022,16 @@ inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, const * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename TBlob::Ptr make_shared_blob(Precision p, const std::vector &arg) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - return make_shared_blob(p, TensorDesc::getLayoutByDims(arg), arg); + return make_shared_blob(TensorDesc(p, SizeVector(arg.rbegin(), arg.rend()), TensorDesc::getLayoutByDims(arg))); } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Creates a blob with the given precision from the pointer to the pre-allocated memory. * @param p Given precision * @param l Layout @@ -811,16 +1041,17 @@ inline typename TBlob::Ptr make_shared_blob(Precision p, const std::vect * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, const SizeVector &dims, TypeTo * ptr, size_t size = 0) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - auto blob = std::make_shared>(p, l, dims, ptr, size); + auto blob = std::make_shared>(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), l), ptr, size); return blob; } /** - * @deprecated Use TensorDesc in order to create Blob::Ptr. + * @deprecated Use InferenceEngine::make_shared_blob(const TensorDesc&) * @brief Creates a blob with the NCHW layout and the given precision from the pointer to the pre-allocated memory * @param p Given precision * @param dims Given dimensions @@ -829,11 +1060,23 @@ inline typename TBlob::Ptr make_shared_blob(Precision p, Layout l, const * @return A shared pointer to the blob created */ template +INFERENCE_ENGINE_DEPRECATED inline typename TBlob::Ptr make_shared_blob(Precision p, const SizeVector &dims, TypeTo * ptr, size_t size = 0) { if (!p.hasStorageType()) THROW_IE_EXCEPTION << "Cannot make shared blob! " << "The blob type cannot be used to store objects of current precision"; - return make_shared_blob(p, TensorDesc::getLayoutByDims(dims), dims, ptr, size); + return make_shared_blob(TensorDesc(p, SizeVector(dims.rbegin(), dims.rend()), TensorDesc::getLayoutByDims(dims)), ptr, size); +} + +/** + * @brief Creates a Blob object of the specified type + * @param args Constructor arguments for the Blob object + * @return A shared pointer to the newly created Blob object + */ +template::value, int>::type = 0> +std::shared_ptr make_shared_blob(Args&& ...args) { + return std::make_shared(std::forward(args)...); } /** diff --git a/inference-engine/include/ie_common.h b/inference-engine/include/ie_common.h index 7d75eee231c337..9e7241da4321a0 100644 --- a/inference-engine/include/ie_common.h +++ b/inference-engine/include/ie_common.h @@ -124,6 +124,36 @@ inline std::ostream & operator << (std::ostream &out, const Layout & p) { return out; } +/** + * @enum Color format + * @brief Extra information about input color format for preprocessing + */ +enum ColorFormat : uint32_t { + RAW = 0u, ///< Plain blob (default), no extra color processing required + RGB, ///< RGB color format + BGR, ///< BGR color format, default in DLDT + RGBX, ///< RGBX color format with X ignored during inference + BGRX, ///< BGRX color format with X ignored during inference + NV12, ///< NV12 color format represented as compound Y+UV blob +}; +inline std::ostream & operator << (std::ostream &out, const ColorFormat & fmt) { + switch (fmt) { +#define PRINT_COLOR_FORMAT(name) \ + case name : out << #name; break; + + PRINT_COLOR_FORMAT(RAW); + PRINT_COLOR_FORMAT(RGB); + PRINT_COLOR_FORMAT(BGR); + PRINT_COLOR_FORMAT(RGBX); + PRINT_COLOR_FORMAT(BGRX); + PRINT_COLOR_FORMAT(NV12); + +#undef PRINT_COLOR_FORMAT + + default: out << static_cast(fmt); break; + } + return out; +} /** * @struct InferenceEngineProfileInfo diff --git a/inference-engine/include/ie_compound_blob.h b/inference-engine/include/ie_compound_blob.h new file mode 100644 index 00000000000000..5a62f99e834a82 --- /dev/null +++ b/inference-engine/include/ie_compound_blob.h @@ -0,0 +1,218 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief A header file for CompoundBlob + * @file ie_compound_blob.h + */ +#pragma once + +#include "ie_blob.h" + +#include +#include +#include + +namespace InferenceEngine { +/** + * @brief This class represents a blob that contains other blobs + * + * Compound blob is a wrapper blob over references to underlying blobs. These blobs should share + * some properties and can be grouped into a single entity. + */ +class INFERENCE_ENGINE_API_CLASS(CompoundBlob) : public Blob { +public: + /** + * @brief A smart pointer to the CompoundBlob object + */ + using Ptr = std::shared_ptr; + + /** + * @brief A smart pointer to the const CompoundBlob object + */ + using CPtr = std::shared_ptr; + + /** + * @brief A virtual destructor + */ + virtual ~CompoundBlob() = default; + + /** + * @brief A copy constructor + */ + CompoundBlob(const CompoundBlob& blob); + + /** + * @brief A copy assignment operator + */ + CompoundBlob& operator=(const CompoundBlob& blob) = default; + + /** + * @brief A move constructor + */ + CompoundBlob(CompoundBlob&& blob); + + /** + * @brief A move assignment operator + */ + CompoundBlob& operator=(CompoundBlob&& blob) = default; + + /** + * @brief Constructs a compound blob from a vector of blobs + * @param blobs A vector of blobs that is copied to this object + */ + explicit CompoundBlob(const std::vector& blobs); + + /** + * @brief Constructs a compound blob from a vector of blobs + * @param blobs A vector of blobs that is moved to this object + */ + explicit CompoundBlob(std::vector&& blobs); + + /** + * @brief Always returns 0 + */ + size_t byteSize() const noexcept override; + + /** + * @brief Always returns 0 + */ + size_t element_size() const noexcept override; + + /** + * @brief No operation is performed. Compound blob does not allocate/deallocate any data + */ + void allocate() noexcept override; + + /** + * @brief No operation is performed. Compound blob does not allocate/deallocate any data + * @return false + */ + bool deallocate() noexcept override; + + /** + * @brief Always returns an empty LockedMemory object + */ + LockedMemory buffer() noexcept override; + + /** + * @brief Always returns an empty LockedMemory object + */ + LockedMemory cbuffer() const noexcept override; + + /** + * @brief Returns the number of underlying blobs in the compound blob + */ + size_t size() const noexcept override; + + /** + * @brief Returns an underlying blob at index i + * @param i the index of the underlying Blob object + * @return A smart pointer to the underlying Blob object or nullptr in case of an error + */ + virtual Blob::Ptr getBlob(size_t i) const noexcept; + +protected: + /** + * @brief A default constructor + */ + CompoundBlob(); + + /** + * @brief Compound blob container for underlying blobs + */ + std::vector _blobs; + + /** + * @brief Returns nullptr as CompoundBlob is not allocator-based + */ + const std::shared_ptr &getAllocator() const noexcept override; + + /** + * @brief Returns nullptr as CompoundBlob is not allocator-based + */ + void *getHandle() const noexcept override; +}; + +/** + * @brief Represents a blob that contains two planes (Y and UV) in NV12 color format + */ +class INFERENCE_ENGINE_API_CLASS(NV12Blob) : public CompoundBlob { +public: + /** + * @brief A smart pointer to the NV12Blob object + */ + using Ptr = std::shared_ptr; + + /** + * @brief A smart pointer to the const NV12Blob object + */ + using CPtr = std::shared_ptr; + + /** + * @brief A deleted default constructor + */ + NV12Blob() = delete; + + /** + * @brief Constructs NV12 blob from two planes Y and UV + * @param y Blob object that represents Y plane in NV12 color format + * @param uv Blob object that represents UV plane in NV12 color format + */ + NV12Blob(const Blob::Ptr& y, const Blob::Ptr& uv); + + /** + * @brief Constructs NV12 blob from two planes Y and UV + * @param y Blob object that represents Y plane in NV12 color format + * @param uv Blob object that represents UV plane in NV12 color format + */ + NV12Blob(Blob::Ptr&& y, Blob::Ptr&& uv); + + /** + * @brief A virtual destructor + */ + virtual ~NV12Blob() = default; + + /** + * @brief A copy constructor + */ + NV12Blob(const NV12Blob& blob) = default; + + /** + * @brief A copy assignment operator + */ + NV12Blob& operator=(const NV12Blob& blob) = default; + + /** + * @brief A move constructor + */ + NV12Blob(NV12Blob&& blob) = default; + + /** + * @brief A move assignment operator + */ + NV12Blob& operator=(NV12Blob&& blob) = default; + + /** + * @brief Returns a shared pointer to Y plane + */ + virtual Blob::Ptr& y() noexcept; + + /** + * @brief Returns a shared pointer to Y plane + */ + virtual const Blob::Ptr& y() const noexcept; + + /** + * @brief Returns a shared pointer to UV plane + */ + virtual Blob::Ptr& uv() noexcept; + + /** + * @brief Returns a shared pointer to UV plane + */ + virtual const Blob::Ptr& uv() const noexcept; +}; + +} // namespace InferenceEngine diff --git a/inference-engine/include/ie_core.hpp b/inference-engine/include/ie_core.hpp new file mode 100644 index 00000000000000..b47357291f4948 --- /dev/null +++ b/inference-engine/include/ie_core.hpp @@ -0,0 +1,158 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief This is a header file for the Inference Engine Core class C++ API + * @file ie_core.hpp + */ +#pragma once + +#include +#include +#include +#include + +#include "cpp/ie_plugin_cpp.hpp" +#include "ie_extension.h" + +namespace InferenceEngine { + +/** + * @brief This class represents Inference Engine Core entity. + * It can throw exceptions safely for the application, where it is properly handled. + */ +class INFERENCE_ENGINE_API_CLASS(Core) { + class Impl; + std::shared_ptr _impl; +public: + /** @brief Constructs Inference Engine Core instance using XML configuration file with + * plugins description. See RegisterPlugins for more details. + * @param xmlConfigFile A path to .xml file with plugins to load from. If XML configuration file is not specified, + * then default Inference Engine plugins are loaded from the default plugin.xml file. + */ + explicit Core(const std::string & xmlConfigFile = std::string()); + + /** + * @brief Returns plugins version information + * @param Device name to indentify plugin + * @return A vector of versions + */ + std::map GetVersions(const std::string & deviceName) const; + + /** + * @brief Sets logging callback + * Logging is used to track what is going on inside the plugins, Inference Engine library + * @param listener Logging sink + */ + void SetLogCallback(IErrorListener &listener) const; + + /** + * @brief Creates an executable network from a network object. Users can create as many networks as they need and use + * them simultaneously (up to the limitation of the hardware resources) + * @param network CNNNetwork object acquired from CNNNetReader + * @param deviceName Name of device to load network to + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load operation + * @return An executable network reference + */ + ExecutableNetwork LoadNetwork(CNNNetwork network, const std::string & deviceName, + const std::map & config = std::map()); + + /** + * @brief Registers extension for the specified plugin + * @param deviceName Device name to indentify plugin to add an extension in + * @param extension Pointer to already loaded extension + */ + void AddExtension(IExtensionPtr extension, const std::string & deviceName); + + /** + * @brief Creates an executable network from a previously exported network + * @param deviceName Name of device load executable network on + * @param modelFileName Path to the location of the exported file + * @param config Optional map of pairs: (config parameter name, config parameter value) relevant only for this load operation* + * @return An executable network reference + */ + ExecutableNetwork ImportNetwork(const std::string &modelFileName, const std::string & deviceName, + const std::map &config = std::map()); + + /** + * @brief Query device if it supports specified network with specified configuration + * @param deviceName A name of a device to query + * @param network Network object to query + * @param config Optional map of pairs: (config parameter name, config parameter value) + * @return Pointer to the response message that holds a description of an error if any occurred + */ + QueryNetworkResult QueryNetwork(const ICNNNetwork &network, const std::string & deviceName, + const std::map & config = std::map()) const; + + /** + * @brief Sets configuration for device, acceptable keys can be found in ie_plugin_config.hpp + * @param deviceName An optinal name of a device. If device name is not specified, the config is set for all the registered devices. + * @param config Map of pairs: (config parameter name, config parameter value) + */ + void SetConfig(const std::map &config, const std::string & deviceName = std::string()); + + /** + * @brief Gets configuration dedicated to device behaviour. The method is targeted to extract information + * which can be set via SetConfig method. + * @param deviceName - A name of a device to get a configuration value. + * @param name - value of config corresponding to config key. + * @return Value of config corresponding to config key. + */ + Parameter GetConfig(const std::string & deviceName, const std::string & name) const; + + /** + * @brief Gets general runtime metric for dedicated hardware. The method is needed to request common device properties + * which are executable network agnostic. It can be device name, temperature, other devices-specific values. + * @param deviceName - A name of a device to get a metric value. + * @param name - metric name to request. + * @return Metric value corresponding to metric key. + */ + Parameter GetMetric(const std::string & deviceName, const std::string & name) const; + + /** + * @brief Returns devices available for neural networks inference + * @return A vector of devices. The devices are returned as { CPU, FPGA.0, FPGA.1, MYRIAD } + If there more than one device of specific type, they are enumerated with .# suffix. + */ + std::vector GetAvailableDevices() const; + + /** + * @brief Register new device and plugin which implement this device inside Inference Engine. + * @param pluginName A name of plugin. Depending on platform pluginName is wrapped with shared library suffix and prefix to identify library full name + * @param deviceName A device name to register plugin for. If device name is not specified, then it's taken from plugin + * using InferenceEnginePluginPtr::GetName function + */ + void RegisterPlugin(const std::string & pluginName, const std::string & deviceName); + + /** + * @brief Removes plugin with specified name from Inference Engine + * @param deviceName Device name identifying plugin to remove from Inference Engine + */ + void UnregisterPlugin(const std::string & deviceName); + + /** @brief Registers plugin to Inference Engine Core instance using XML configuration file with + * plugins description. XML file has the following structure: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * - `name` identifies name of device enabled by plugin + * - `location` specifies absolute path to dynamic library with plugin. A path can also be relative to inference engine shared library. + * It allows to have common config for different systems with different configurations. + * - Properties are set to plugin via the `SetConfig` method. + * - Extensions are set to plugin via the `AddExtension` method. + */ + void RegisterPlugins(const std::string & xmlConfigFile); +}; +} // namespace InferenceEngine diff --git a/inference-engine/include/ie_data.h b/inference-engine/include/ie_data.h index 0ae2073066f8f2..75e906f4424c2f 100644 --- a/inference-engine/include/ie_data.h +++ b/inference-engine/include/ie_data.h @@ -27,40 +27,47 @@ namespace InferenceEngine { class INFERENCE_ENGINE_API_CLASS(Data) { public: /** - * @deprecated Deprecated. Please use getPrecision() + * @deprecated Use Data::getPrecision * @brief A precision type of this Data instance */ + INFERENCE_ENGINE_DEPRECATED Precision precision; /** - * @deprecated Deprecated. Please use getFormat() + * @deprecated Use Data::getFormat * @brief A data layout of this Data instance */ + INFERENCE_ENGINE_DEPRECATED Layout layout; /** - * @deprecated Deprecated. Please use getDims() + * @deprecated Use Data::getDims * @brief A tensor dimension array (the order is opposite to the order in the IR: w,h,c,n) of this Data instance */ + INFERENCE_ENGINE_DEPRECATED SizeVector dims; /** - * @deprecated Deprecated. Please use getCreatorLayer() + * @deprecated Use Data::getCreatorLayer * @brief A pointer to the layer that creates this data element, null for input data elements */ + INFERENCE_ENGINE_DEPRECATED CNNLayerWeakPtr creatorLayer; /** - * @deprecated Deprecated. Please use getName() + * @deprecated Use Data::getName * @brief A unique name that identifies this data node */ + INFERENCE_ENGINE_DEPRECATED std::string name; /** - * @deprecated Deprecated. Please use getInputTo() + * @deprecated Use Data::getInputTo * @brief A map of layers that use this node as input. * It is useful for recursive NN graph traversal. */ + INFERENCE_ENGINE_DEPRECATED std::map inputTo; /** - * @deprecated Deprecated. Please use getUserObject() + * @deprecated Use Data::getUserObject * @brief A user utility place holder */ + INFERENCE_ENGINE_DEPRECATED UserValue userObject; /** @@ -84,6 +91,22 @@ class INFERENCE_ENGINE_API_CLASS(Data) { */ Data(const std::string &name, const TensorDesc& desc); + /** + * @brief A copy constructor + * @param data A data + */ + Data(const Data & data); + + /** + * @brief A destructor + */ + ~Data(); + + /** + * @brief An assignment operator + */ + Data & operator = (const Data &); + /** * @brief Checks if the current node is resolved * @return true if resolved, false otherwise. @@ -98,11 +121,12 @@ class INFERENCE_ENGINE_API_CLASS(Data) { void setDims(const SizeVector &a_dims); /** - * @deprecated + * @deprecated Use Data::setDims to set batch size. * @brief Sets the batch value in the data dimensions. * Batch is defined as the last element in the dimensions vector. * @param batch_size Batch size to set */ + INFERENCE_ENGINE_DEPRECATED void setBatchSize(size_t batch_size); /** @@ -136,8 +160,8 @@ class INFERENCE_ENGINE_API_CLASS(Data) { const Precision& getPrecision() const; /** - * @brief Gets a precision type of this Data instance - * @return Precision type + * @brief Sets a precision type of this Data instance + * @param precision Precision of the data */ void setPrecision(const Precision& precision); @@ -156,6 +180,14 @@ class INFERENCE_ENGINE_API_CLASS(Data) { */ const std::string& getName() const; + + /** + * @brief Sets a name the Data object + * @param name Name of the data node + */ + + void setName(const std::string& newName); + /** * @brief returns child layers in di-graph */ diff --git a/inference-engine/include/ie_device.hpp b/inference-engine/include/ie_device.hpp index 778d382891ab69..c16602113e768e 100644 --- a/inference-engine/include/ie_device.hpp +++ b/inference-engine/include/ie_device.hpp @@ -17,6 +17,7 @@ namespace InferenceEngine { /** + * @deprecated Deprecated since the enum is not scalable for 3rd party plugins / devices. All devices are managed by InferenceEngine::Core * @enum TargetDevice * @brief Describes known device types */ @@ -32,14 +33,16 @@ enum class TargetDevice : uint8_t { }; /** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated * @brief Describes the relationship between the enumerator type and the actual device's name */ -class TargetDeviceInfo { +class INFERENCE_ENGINE_DEPRECATED TargetDeviceInfo { struct Info { TargetDevice device; std::string name; Info(TargetDevice device, std::string name) : device(device), name(name){} }; + static const std::vector & getAll() { #define DECL_DEVICE(device_type) {TargetDevice::e##device_type, #device_type} @@ -51,13 +54,18 @@ class TargetDeviceInfo { DECL_DEVICE(FPGA), DECL_DEVICE(MYRIAD), DECL_DEVICE(GNA), - DECL_DEVICE(HETERO), + DECL_DEVICE(HETERO) }; #undef DECLARE return g_allDeviceInfos; } public: + /** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated + * @brief Converts string representation of device to InferenceEngine::TargetDevice enum value + */ + INFERENCE_ENGINE_DEPRECATED static TargetDevice fromStr(const std::string &deviceName) { static std::map deviceFromNameMap = { { "CPU", InferenceEngine::TargetDevice::eCPU }, @@ -66,63 +74,88 @@ class TargetDeviceInfo { { "MYRIAD", InferenceEngine::TargetDevice::eMYRIAD }, { "GNA", InferenceEngine::TargetDevice::eGNA }, { "BALANCED", InferenceEngine::TargetDevice::eBalanced }, - { "HETERO", InferenceEngine::TargetDevice::eHETERO }, + { "HETERO", InferenceEngine::TargetDevice::eHETERO } }; auto val = deviceFromNameMap.find(deviceName); return val != deviceFromNameMap.end() ? val->second : InferenceEngine::TargetDevice::eDefault; } + /** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated + * @brief Converts InferenceEngine::TargetDevice enum value to string representation + */ + INFERENCE_ENGINE_DEPRECATED static const char * name(TargetDevice device) { + IE_SUPPRESS_DEPRECATED_START auto res = std::find_if(getAll().cbegin(), getAll().cend(), [&](const Info & info){ return device == info.device; }); if (res == getAll().cend()) { return "Unknown device"; } + IE_SUPPRESS_DEPRECATED_END return res->name.c_str(); } }; /** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated * @brief Returns the device name - * @param device Instance of TargetDevice + * @param device Instance of InferenceEngine::TargetDevice * @return A c-string with the name */ +INFERENCE_ENGINE_DEPRECATED inline const char *getDeviceName(TargetDevice device) { + IE_SUPPRESS_DEPRECATED_START return TargetDeviceInfo::name(device); + IE_SUPPRESS_DEPRECATED_END } /** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated * @struct FindPluginRequest - * @brief Defines a message that contains the TargetDevice object to find a plugin for + * @brief Defines a message that contains the InferenceEngine::TargetDevice object to find a plugin for */ -struct FindPluginRequest { +struct INFERENCE_ENGINE_DEPRECATED FindPluginRequest { /** - * @brief object of TargetDevice to find a plugin for + * @brief object of InferenceEngine::TargetDevice to find a plugin for */ TargetDevice device; }; /** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated * @struct FindPluginResponse * @brief Defines a message that contains a list of appropriate plugin names */ -struct FindPluginResponse { +struct INFERENCE_ENGINE_DEPRECATED FindPluginResponse { /** * @brief A list of appropriate plugin names */ std::vector names; }; +IE_SUPPRESS_DEPRECATED_START + /** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated * @brief Finds an appropriate plugin for requested target device * @param req A requested target device - * @param result The results of the request - * @param resp The response message description - * @return A response message + * @return A response object */ FindPluginResponse findPlugin(const FindPluginRequest &req); +/** + * @deprecated Deprecated since InferenceEngine::TargetDevice is deprecated + * @brief Finds an appropriate plugin for requested target device + * @param req A requested target device + * @param result The results of the request + * @param resp The response message description + * @return A status code + */ INFERENCE_ENGINE_API(StatusCode) findPlugin(const FindPluginRequest &req, FindPluginResponse &result, ResponseDesc *resp) noexcept; + +IE_SUPPRESS_DEPRECATED_END + } // namespace InferenceEngine diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/include/ie_icnn_network.hpp index cf6869b6ae9926..26f536d14aee0f 100644 --- a/inference-engine/include/ie_icnn_network.hpp +++ b/inference-engine/include/ie_icnn_network.hpp @@ -121,28 +121,40 @@ class ICNNNetwork : public details::IRelease { virtual StatusCode getLayerByName(const char* layerName, CNNLayerPtr& out, ResponseDesc* resp) const noexcept = 0; /** + * @deprecated Deprecated since TargetDevice is deprecated. Specify target device in InferenceEngine::Core directly. * @brief Sets a desirable device to perform all work on. * Some plug-ins might not support some target devices and may abort execution with an appropriate error message. * @param device Device to set as a target */ + #ifndef _WIN32 + INFERENCE_ENGINE_DEPRECATED + #endif virtual void setTargetDevice(TargetDevice device) noexcept = 0; /** + * @deprecated Deprecated since TargetDevice is deprecated * @brief Gets the target device. * If setTargetDevice() was not called before, returns eDefault * @return A TargetDevice instance */ + #ifndef _WIN32 + INFERENCE_ENGINE_DEPRECATED + #endif virtual TargetDevice getTargetDevice() const noexcept = 0; /** - * @deprecated use setBatchSize with ResponseDesc to get error message + * @deprecated Use ICNNNetwork::setBatchSize(size_t, ResponseDesc*) * @brief Changes the inference batch size */ - virtual StatusCode setBatchSize(const size_t size) noexcept = 0; + INFERENCE_ENGINE_DEPRECATED + virtual StatusCode setBatchSize(const size_t size) noexcept { + ResponseDesc resp; + return setBatchSize(size, &resp); + } /** * @brief Changes the inference batch size. - * @note There are several limitations and it's not recommended to use it. Set batch to the input shape and call @reshape. + * @note There are several limitations and it's not recommended to use it. Set batch to the input shape and call ICNNNetwork::reshape. * @param size Size of batch to set * @return Status code of the operation * @note: Current implementation of the function sets batch size to the first dimension of all layers in the networks. diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/include/ie_iexecutable_network.hpp index f3f422191a78c7..7dafa4086582a9 100644 --- a/inference-engine/include/ie_iexecutable_network.hpp +++ b/inference-engine/include/ie_iexecutable_network.hpp @@ -14,6 +14,7 @@ #include "ie_icnn_network.hpp" #include "ie_imemory_state.hpp" #include "ie_input_info.hpp" +#include "ie_parameter.hpp" #include #include #include @@ -96,7 +97,36 @@ class IExecutableNetwork : public details::IRelease { * @param resp Optional: pointer to an already allocated object to contain information in case of failure * @return Status code of the operation: OK (0) for success, OUT_OF_BOUNDS (-6) no memory state for given index */ - virtual StatusCode QueryState(IMemoryState::Ptr & pState, size_t idx, ResponseDesc *resp) noexcept = 0; + virtual StatusCode QueryState(IMemoryState::Ptr & pState, size_t idx, ResponseDesc *resp) noexcept = 0; + + /** + * @brief Sets configuration for current executable network + * @param config Map of pairs: (config parameter name, config parameter value) + * @param resp Pointer to the response message that holds a description of an error if any occurred + * @return code of the operation. OK if succeeded + */ + virtual StatusCode SetConfig(const std::map &config, ResponseDesc *resp) noexcept = 0; + + /** @brief Gets configuration for current executable network. The method is responsible to extract information + * which affects executable network execution. The list of supported configuration values can be extracted via + * ExecutableNetwork::GetMetric with the SUPPORTED_CONFIG_KEYS key, but some of these keys cannot be changed dymanically, + * e.g. DEVICE_ID cannot changed if an executable network has already been compiled for particular device. + * @param name - config key, can be found in ie_plugin_config.hpp + * @param result - value of config corresponding to config key + * @param resp - Pointer to the response message that holds a description of an error if any occurred + * @return code of the operation. OK if succeeded + */ + virtual StatusCode GetConfig(const std::string &name, Parameter &result, ResponseDesc *resp) const noexcept = 0; + + /** + * @brief Gets general runtime metric for an executable network. It can be network name, actual device ID on + * which executable network is running or all other properties which cannot be changed dynamically. + * @param name - metric name to request + * @param result - metric value corresponding to metric key + * @param resp - Pointer to the response message that holds a description of an error if any occurred + * @return code of the operation. OK if succeeded + */ + virtual StatusCode GetMetric(const std::string &name, Parameter &result, ResponseDesc *resp) const noexcept = 0; }; } // namespace InferenceEngine diff --git a/inference-engine/include/ie_iextension.h b/inference-engine/include/ie_iextension.h index 7d529b4b5bc2c3..e1510d91c652be 100644 --- a/inference-engine/include/ie_iextension.h +++ b/inference-engine/include/ie_iextension.h @@ -127,13 +127,14 @@ class ILayerImplFactory { virtual ~ILayerImplFactory() = default; /** - * @deprecated + * @deprecated Implement IShapeInferImpl extension for shape inference. * @brief Sets output shapes by input shapes. * @param inShapes Shapes of all inputs coming in this layer * @param outShapes Generated shapes coming from this layer given the input * @param resp Response descriptor * @return Status code */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode getShapes(const std::vector& /*inShapes*/, std::vector& /*outShapes*/, ResponseDesc* /*resp*/) noexcept { return NOT_IMPLEMENTED; @@ -168,14 +169,18 @@ class IShapeInferImpl { ResponseDesc* /*resp*/) noexcept { return NOT_IMPLEMENTED; } // For backward-compatibility /** - * @deprecated + * @deprecated Use IShapeInferImpl::inferShapes(const std::vector&, const std::map&, + const std::map&, std::vector&, ResponseDesc* ) noexcept. * @brief check that reshape can be applied, that parameters and shapes are valid */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode inferShapes(const std::vector& /*inShapes*/, const std::map& /*params*/, const std::map& /*blobs*/, std::vector& /*outShapes*/, - ResponseDesc* /*resp*/) noexcept { return NOT_IMPLEMENTED; } // For backward-compatibility + ResponseDesc* /*resp*/) noexcept { + return NOT_IMPLEMENTED; + } }; /** diff --git a/inference-engine/include/ie_ihetero_plugin.hpp b/inference-engine/include/ie_ihetero_plugin.hpp index f9f1f23a8684d2..c1e3fae543cd27 100644 --- a/inference-engine/include/ie_ihetero_plugin.hpp +++ b/inference-engine/include/ie_ihetero_plugin.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,8 @@ namespace InferenceEngine { /** - * This interface describes a mechanism of custom loaders to be used in heterogeneous + * @deprecated Use InferenceEngine::Core to work with HETERO device + * @brief This interface describes a mechanism of custom loaders to be used in heterogeneous * plugin during setting of affinity and loading of split sub-network to the plugins * The custom loader can define addition settings for the plugins or network loading * Examples of cases when this interface should be implemented in the application: @@ -34,13 +36,13 @@ namespace InferenceEngine { * IHeteroInferencePlugin::SetDeviceLoader("Device1", HeteroDeviceLoaderImpl1) * IHeteroInferencePlugin::SetDeviceLoader("Device2", HeteroDeviceLoaderImpl2) */ -class IHeteroDeviceLoader { +class INFERENCE_ENGINE_DEPRECATED INFERENCE_ENGINE_API_CLASS(IHeteroDeviceLoader) { public: - using Ptr = std::shared_ptr; - virtual ~IHeteroDeviceLoader() = default; + virtual ~IHeteroDeviceLoader(); /** - * Loads network to the device. The instantiation of plugin should be in the implementation + * @deprecated Use InferenceEngine::Core with HETERO device in InferenceEngine::Core::LoadNetwork. + * @brief Loads network to the device. The instantiation of plugin should be in the implementation * of the IHeteroDeviceLoader. As well setting of special config option should happen in the * implementation as well * @param device Loading of network should happen for this device @@ -50,6 +52,7 @@ class IHeteroDeviceLoader { * @param resp Pointer to the response message that holds a description of an error if any occurred * @return Status code of the operation. OK if succeeded */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode LoadNetwork( const std::string& device, IExecutableNetwork::Ptr &ret, @@ -58,54 +61,75 @@ class IHeteroDeviceLoader { ResponseDesc *resp) noexcept = 0; /** - * @depricated Use the version with config parameter - * This function calls plugin function QueryNetwork for the plugin being instantiated + * @deprecated Use the IHeteroDeviceLoader::QueryNetwork + * @brief This function calls plugin function QueryNetwork for the plugin being instantiated * in the implementation of IHeteroDeviceLoader * @param device QueryNetwork will be executed for this device * @param network Network object acquired from CNNNetReader - * @param res + * @param res Query network result object */ + INFERENCE_ENGINE_DEPRECATED virtual void QueryNetwork(const std::string &device, const ICNNNetwork &network, - QueryNetworkResult &res) noexcept = 0; + QueryNetworkResult &res) noexcept { + IE_SUPPRESS_DEPRECATED_START + QueryNetwork(device, network, { }, res); + IE_SUPPRESS_DEPRECATED_END + } /** - * This function calls plugin function QueryNetwork for the plugin being instantiated + * @deprecated Use InferenceEngine::Core with HETERO device in InferenceEngine::Core::QueryNetwork. + * @brief This function calls plugin function QueryNetwork for the plugin being instantiated * in the implementation of IHeteroDeviceLoader * @param device QueryNetwork will be executed for this device * @param network Network object acquired from CNNNetReader * @param config Network configuration parameters - * @param res + * @param res Query network result object */ + INFERENCE_ENGINE_DEPRECATED virtual void QueryNetwork(const std::string &device, const ICNNNetwork &network, const std::map& /*config*/, - QueryNetworkResult &res) noexcept { - QueryNetwork(device, network, res); - }; + QueryNetworkResult &res) noexcept = 0; + INFERENCE_ENGINE_DEPRECATED virtual void SetLogCallback(IErrorListener &listener) = 0; + + IE_SUPPRESS_DEPRECATED_START + using Ptr = std::shared_ptr; + IE_SUPPRESS_DEPRECATED_END }; +IE_SUPPRESS_DEPRECATED_START using MapDeviceLoaders = std::map; +IE_SUPPRESS_DEPRECATED_END /** - * This interface extends regular plugin interface for heterogeneous case. Not all plugins + * @deprecated Use InferenceEngine::Core with HETERO mode in LoadNetwork, QueryNetwork, etc + * @brief This interface extends regular plugin interface for heterogeneous case. Not all plugins * implements it. The main purpose of this interface - to register loaders and have an ability * to get default settings for affinity on certain devices. */ -class IHeteroInferencePlugin : public IInferencePlugin { +class INFERENCE_ENGINE_DEPRECATED INFERENCE_ENGINE_API_CLASS(IHeteroInferencePlugin) : public IInferencePlugin { public: + virtual ~IHeteroInferencePlugin(); + /** + * @deprecated Use InferenceEngine::Core to work with HETERO device * Registers device loader for the device * @param device - the device name being used in CNNNLayer::affinity * @param loader - helper class allowing to analyze if layers are supported and allow * to load network to the plugin being defined in the IHeteroDeviceLoader implementation */ + IE_SUPPRESS_DEPRECATED_START + INFERENCE_ENGINE_DEPRECATED virtual void SetDeviceLoader(const std::string &device, IHeteroDeviceLoader::Ptr loader) noexcept = 0; + IE_SUPPRESS_DEPRECATED_END /** - * The main goal of this function to set affinity according to the options set for the plugin\ + * @deprecated Use InferenceEngine::Core::QueryNetwork with HETERO device and QueryNetworkResult::supportedLayersMap + * to set affinities to a network + * @brief The main goal of this function to set affinity according to the options set for the plugin * implementing IHeteroInferencePlugin. * This function works only if all affinity in the network are empty. * @param network Network object acquired from CNNNetReader @@ -113,6 +137,7 @@ class IHeteroInferencePlugin : public IInferencePlugin { * @param resp Pointer to the response message that holds a description of an error if any occurred * @return Status code of the operation. OK if succeeded */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode SetAffinity( ICNNNetwork& network, const std::map &config, diff --git a/inference-engine/include/ie_input_info.hpp b/inference-engine/include/ie_input_info.hpp index 590b4918d9ca65..8733f7ac7ac9fb 100644 --- a/inference-engine/include/ie_input_info.hpp +++ b/inference-engine/include/ie_input_info.hpp @@ -30,7 +30,7 @@ class InputInfo { using CPtr = std::shared_ptr; /** - * @deprecated it will be removed from public API. Please use getPrecision() + * @deprecated Use InputInfo::getPrecision * @brief Gets a precision of the input data provided by user * * By default it matches the layers precision, but there are exceptions of this rule @@ -40,21 +40,23 @@ class InputInfo { * @details By default it matches the layers precision, but there are exceptions of this rule. * For Q78 precision networks the input is expected in I16 by default. * For FP16 precision networks the input is expected in FP32 by default. - * The default input precision might be changed preferred one using setInputPrecision() + * The default input precision might be changed preferred one using InputInfo::setPrecision * function. * For example, for a Q78 precision network you can pass FP32 input data * @return The precision used for input blob creation */ + INFERENCE_ENGINE_DEPRECATED Precision getInputPrecision() const { return getPrecision(); } /** - * @deprecated it will be removed from public API. Please use setPrecision() + * @deprecated Use InputInfo::setPrecision * @brief Changes the precision of the input data provided by the user. * This function should be called before loading the network to the plugin * @param p A new precision of the input data to set */ + INFERENCE_ENGINE_DEPRECATED void setInputPrecision(Precision p) { setPrecision(p); } @@ -69,7 +71,7 @@ class InputInfo { * @details By default it matches the layers precision, but there are exceptions of this rule. * For Q78 precision networks the input is expected in I16 by default. * For FP16 precision networks the input is expected in FP32 by default. - * The default input precision might be changed preferred one using setInputPrecision() + * The default input precision might be changed preferred one using InputInfo::setPrecision() * function. * For example, for a Q78 precision network you can pass FP32 input data * @return The precision used for input blob creation @@ -139,7 +141,7 @@ class InputInfo { /** * @brief Initializes the pointer to the input data that stores the main input parameters like dims, etc. * This method initializes the precision with the information from the inputPtr if it was not set - * explicitly through setInputPrecision(). If setInputPrecision() was called, this method does not overwrite the precision. + * explicitly through InputInfo::setPrecision. If InputInfo::setPrecision is called, this method does not overwrite the precision. * @param inputPtr Pointer to the input data to set */ void setInputData(DataPtr inputPtr) { @@ -147,13 +149,16 @@ class InputInfo { } /** - * @deprecated Please use getTensorDesc for working with layouts and dimensions + * @deprecated Please use InputInfo::getTensorDesc for working with layouts and dimensions * @brief Gets dimensions/shape of the input data with reversed order * @return A SizeVector object that contains dimensions of the input data. If the data is not set, the method returns an empty SizeVector object. */ + INFERENCE_ENGINE_DEPRECATED SizeVector getDims() const { if (_inputData) { - return _inputData->dims; + auto dims = _inputData->getTensorDesc().getDims(); + std::reverse(dims.begin(), dims.end()); + return dims; } else { return SizeVector(); } diff --git a/inference-engine/include/ie_layers.h b/inference-engine/include/ie_layers.h index c3e867ee0f3417..66f43fa00c684d 100644 --- a/inference-engine/include/ie_layers.h +++ b/inference-engine/include/ie_layers.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "ie_common.h" #include "ie_data.h" @@ -118,16 +119,37 @@ class CNNLayer { */ INFERENCE_ENGINE_API_CPP(void) validateLayer(); + /** + * @brief Parse string with float in accordance with IE rules + * @param str input string with float value + * @return float value if parsing was successful + * @throws InferenceEngineException in case of parsing error + */ + static float ie_parse_float(const std::string &str) { + if (str == "-inf") { + return -std::numeric_limits::infinity(); + } else if (str == "inf") { + return std::numeric_limits::infinity(); + } else { + float res; + std::stringstream val_stream(str); + val_stream.imbue(std::locale("C")); + val_stream >> res; + if (!val_stream.eof()) THROW_IE_EXCEPTION; + return res; + } + } + /** * @brief Gets float value for the given parameter - * @param param - name of the parameter to find - * @param def - default value of the parameter if not found + * @param param name of the parameter to find + * @param def default value of the parameter if not found * @return float value */ float GetParamAsFloat(const char* param, float def) const { std::string val = GetParamAsString(param, std::to_string(def).c_str()); try { - return std::stof(val); + return ie_parse_float(val); } catch (...) { THROW_IE_EXCEPTION << "Cannot parse parameter " << param << " from IR for layer " << name << ". Value " << val << " cannot be casted to float."; @@ -142,7 +164,7 @@ class CNNLayer { float GetParamAsFloat(const char *param) const { std::string val = GetParamAsString(param); try { - return std::stof(val); + return ie_parse_float(val); } catch (...) { THROW_IE_EXCEPTION << "Cannot parse parameter " << param << " from IR for layer " << name << ". Value " << val << " cannot be casted to float."; @@ -164,7 +186,8 @@ class CNNLayer { return def; while (getline(stream, str, ',')) { try { - result.push_back(std::stof(str)); + float val = ie_parse_float(str); + result.push_back(val); } catch (...) { THROW_IE_EXCEPTION << "Cannot parse parameter " << param << " " << str << " from IR for layer " << name << ". Value " << vals << " cannot be casted to floats."; @@ -185,7 +208,8 @@ class CNNLayer { std::string str; while (getline(stream, str, ',')) { try { - result.push_back(std::stof(str)); + float val = ie_parse_float(str); + result.push_back(val); } catch (...) { THROW_IE_EXCEPTION << "Cannot parse parameter " << param << " " << str << " from IR for layer " << name << ". Value " << vals << " cannot be casted to floats."; @@ -299,7 +323,7 @@ class CNNLayer { unsigned int GetParamAsUInt(const char *param) const { std::string val = GetParamAsString(param); std::string message = "Cannot parse parameter " + std::string(param) + " from IR for layer " + name - + ". Value " + val + " cannot be casted to int."; + + ". Value " + val + " cannot be casted to unsigned int."; try { int value = std::stoi(val); if (value < 0) { @@ -324,7 +348,7 @@ class CNNLayer { std::istringstream stream(vals); std::string str; std::string message = "Cannot parse parameter " + std::string(param) + " " + str + " from IR for layer " + name - + ". Value " + vals + " cannot be casted to int."; + + ". Value " + vals + " cannot be casted to unsigned int."; if (vals.empty()) return def; while (getline(stream, str, ',')) { @@ -390,8 +414,9 @@ class CNNLayer { return result; } /** - * @deprecated Use GetParamAsBool function for that functionality + * @deprecated Use CNNLayer::GetParamAsBool */ + INFERENCE_ENGINE_DEPRECATED bool GetParamsAsBool(const char *param, bool def) const { return GetParamAsBool(param, def); } @@ -588,6 +613,20 @@ class DeconvolutionLayer : public ConvolutionLayer { using ConvolutionLayer::operator=; }; +/** + * @brief This class represents a standard deformable convolution layer + */ +class DeformableConvolutionLayer : public ConvolutionLayer { +public: + using ConvolutionLayer::ConvolutionLayer; + using ConvolutionLayer::operator=; + + /** + * @brief Number of deformable groups + */ + unsigned int _deformable_group = 1u; +}; + /** * @brief This class represents a standard pooling layer */ @@ -839,7 +878,7 @@ class NormLayer : public CNNLayer { */ unsigned int _size = 0; /** - * @deprecated + * @brief K */ unsigned int _k = 1; /** @@ -981,7 +1020,7 @@ class EltwiseLayer : public CNNLayer { enum eOperation { Sum = 0, Prod, Max, Sub, Min, Div, Squared_diff, Floor_mod, Pow, Equal, Not_equal, Less, Less_equal, Greater, Greater_equal, - Logical_AND, Logical_OR, Logical_XOR + Logical_AND, Logical_OR, Logical_XOR, Logical_NOT, Mean, Select }; /** @@ -1290,6 +1329,7 @@ using RNNCell = RNNCellBase; * [N,T,D] Xt - input data * [ND,N,S] Ht-1 - initial hidden state * [ND,N,S] Ct-1 - initial cell state // if NS==2 + * [N] SL - sequence length (optional input) * * Outputs: * [ND,N,T,S] Xt - input data @@ -1587,26 +1627,33 @@ class ReverseSequenceLayer : public CNNLayer { /** -* @brief This class represents a standard Squeeze layer -* Squeeze modifies input tensor dimensions according parameters +* @brief This class represents a OneHot layer +* Converts input into OneHot representation. */ -class SqueezeLayer : public CNNLayer { +class OneHotLayer : public CNNLayer { public: /** - * @brief Creates a new Squeeze instance. + * @brief A depth of representation */ - using CNNLayer::CNNLayer; -}; + unsigned int depth = 0; + /** + * @brief The locations represented by indices in input take value on_value + */ + float on_value = 1.f; + + /** + * @brief The locations not represented by indices in input take value off_value + */ + float off_value = 0.f; -/** -* @brief This class represents a standard Unsqueeze layer -* Unsqueeze modifies input tensor dimensions according parameters -*/ -class UnsqueezeLayer : public CNNLayer { -public: /** - * @brief Creates a new Unsqueeze instance. + * @brief Define the shape of output tensor + */ + int axis = -1; + + /** + * @brief Creates a new OneHot instance */ using CNNLayer::CNNLayer; }; @@ -1639,13 +1686,28 @@ class FillLayer : public CNNLayer { /** -* @brief This class represents a standard Expand layer -* Expand modifies input tensor dimensions according parameters +* @brief This class represents a SelectLayer layer +* SelectLayer layer takes elements from the second (“then”) or the third (“else”) input based on condition mask (“cond”) provided in the first input. +* The “cond” tensor is broadcasted to “then” and “else” tensors. +* The output tensor shape is equal to broadcasted shape of “cond”, “then” and “else”. */ -class ExpandLayer : public CNNLayer { +class SelectLayer : public CNNLayer { public: /** - * @brief Creates a new Expand instance. + * @brief Creates a new SelectLayer instance. + */ + using CNNLayer::CNNLayer; +}; + + +/** +* @brief This class represents a standard Broadcast layer +* Broadcast modifies input tensor dimensions according parameters +*/ +class BroadcastLayer : public CNNLayer { +public: + /** + * @brief Creates a new Broadcast instance. */ using CNNLayer::CNNLayer; }; @@ -1667,4 +1729,62 @@ class QuantizeLayer : public CNNLayer { using CNNLayer::CNNLayer; }; + +/** +* @brief This class represents a standard Math layers +* Math modifies input tensor dimensions according parameters +*/ +class MathLayer : public CNNLayer { +public: + /** + * @brief Creates a new Math instance. + */ + using CNNLayer::CNNLayer; +}; + + +/** +* @brief This class represents a standard Reduce layers +* Reduce modifies input tensor according parameters +*/ +class ReduceLayer : public CNNLayer { +public: + /** + * @brief The keep_dims dimension in tensor which is partially reversed + */ + bool keep_dims = true; + + /** + * @brief Creates a new Reduce instance. + */ + using CNNLayer::CNNLayer; +}; + + +/** + * @brief This class represents a standard TopK layer + * TopK picks top K values from input tensor according parameters + */ +class TopKLayer : public CNNLayer { +public: + /** + * @brief The mode could be 'max' or 'min' + */ + std::string mode; + /** + * @brief top K values sort mode could be 'value' or 'index' + */ + std::string sort; + /** + * @brief The axis dimension in tensor which is top K values are picked + */ + int axis = -1; + + /** + * @brief Creates a new TopKLayer instance. + */ + using CNNLayer::CNNLayer; +}; + + } // namespace InferenceEngine diff --git a/inference-engine/include/ie_layers_property.hpp b/inference-engine/include/ie_layers_property.hpp index eeac6b6ec9d9dc..fa3e631518a323 100644 --- a/inference-engine/include/ie_layers_property.hpp +++ b/inference-engine/include/ie_layers_property.hpp @@ -61,21 +61,21 @@ class PropertyVector { */ T &at(int index) { if (index >= N) { - THROW_IE_EXCEPTION << "Property index is out of bounds(" << index << "/" << N; + THROW_IE_EXCEPTION << "Property index is out of bounds (" << index << "/" << N; } return _axises[index]; } const T &operator[](size_t index) const { if (index >= N ||!_allocated[index]) { - THROW_IE_EXCEPTION << "Property index ("<< index <<")is out of bounds"; + THROW_IE_EXCEPTION << "Property index ("<< index <<") is out of bounds"; } return _axises[index]; } T &operator[](size_t index) { if (index >= N || !_allocated[index]) { - THROW_IE_EXCEPTION << "Property index ("<< index <<")is out of bounds"; + THROW_IE_EXCEPTION << "Property index ("<< index <<") is out of bounds"; } return _axises[index]; } diff --git a/inference-engine/include/ie_layouts.h b/inference-engine/include/ie_layouts.h index 740da27b220308..38901c07dbb66f 100644 --- a/inference-engine/include/ie_layouts.h +++ b/inference-engine/include/ie_layouts.h @@ -195,7 +195,7 @@ class INFERENCE_ENGINE_API_CLASS(TensorDesc) { * @brief Returns the constant vector of dimensions * @return dimensions */ - const SizeVector& getDims() const { + const SizeVector& getDims() const noexcept { return dims; } /** @@ -329,18 +329,34 @@ class INFERENCE_ENGINE_API_CLASS(TensorDesc) { }; /** - * @deprecated + * @deprecated Deprecated since provides dims in reverse order */ +INFERENCE_ENGINE_DEPRECATED static const size_t I_N = 3; + +/** + * @deprecated Deprecated since provides dims in reverse order + */ +INFERENCE_ENGINE_DEPRECATED static const size_t I_C = 2; + +/** + * @deprecated Deprecated since provides dims in reverse order + */ +INFERENCE_ENGINE_DEPRECATED static const size_t I_H = 1; + +/** + * @deprecated Deprecated since provides dims in reverse order + */ +INFERENCE_ENGINE_DEPRECATED static const size_t I_W = 0; /** * @deprecated Uses TensorDesc working with layouts * @brief This class helps calculating offset in different layouts */ -class INFERENCE_ENGINE_API_CLASS(LayoutOffsetCounter) { +class INFERENCE_ENGINE_DEPRECATED INFERENCE_ENGINE_API_CLASS(LayoutOffsetCounter) { private: Layout _layout; SizeVector _dims; @@ -359,6 +375,24 @@ class INFERENCE_ENGINE_API_CLASS(LayoutOffsetCounter) { */ LayoutOffsetCounter(Layout layout, SizeVector dims); + IE_SUPPRESS_DEPRECATED_START + /** + * @brief A copy constructor + */ + LayoutOffsetCounter(const LayoutOffsetCounter & l); + + /** + * @brief A copy assignment operator + * @param l A value to copy from + */ + LayoutOffsetCounter & operator = (const LayoutOffsetCounter & l); + IE_SUPPRESS_DEPRECATED_END + + /** + * @brief A destructor + */ + ~LayoutOffsetCounter(); + /** * @brief Calculates an offset for the specified layout * @param pos Tensor position array (reverse NCHW order as in the IR: w,h,c,n) @@ -367,9 +401,12 @@ class INFERENCE_ENGINE_API_CLASS(LayoutOffsetCounter) { }; /** - * @deprecated Please use TensorDescriptors for conversion + * @deprecated Please use TensorDesc for conversion */ -template void ConvertLayout(Layout sourceLayout, Layout destLayout, const T* sourceBuffer, T* destBuffer, SizeVector dims) { +template +INFERENCE_ENGINE_DEPRECATED +void ConvertLayout(Layout sourceLayout, Layout destLayout, const T* sourceBuffer, T* destBuffer, SizeVector dims) { + IE_SUPPRESS_DEPRECATED_START if (dims.size() == 0) return; SizeVector pos(dims.size(), 0); @@ -396,6 +433,7 @@ template void ConvertLayout(Layout sourceLayout, Layout destLayout, pos[caret]++; } } + IE_SUPPRESS_DEPRECATED_END } } // namespace InferenceEngine diff --git a/inference-engine/include/ie_parallel.hpp b/inference-engine/include/ie_parallel.hpp index 01c0c0e360edab..a214b10b01a7cd 100644 --- a/inference-engine/include/ie_parallel.hpp +++ b/inference-engine/include/ie_parallel.hpp @@ -5,7 +5,7 @@ /** * @brief Contains declarations and definitions for sequential and multi-threading implementations. * Multi-threading support is implemented in two variants: using the Threading Building Blocks library and OpenMP* product. - * To build a particular implementation, use the corresponding identifier: IE_THREAD_TBB, IE_THREAD_OMP or IE_THREAD_SEQ. + * To build a particular implementation, use the corresponding identifier: IE_THREAD_TBB, IE_THREAD_TBB_AUTO, IE_THREAD_OMP or IE_THREAD_SEQ. * @file ie_parallel.hpp */ @@ -16,8 +16,9 @@ #define IE_THREAD_TBB 0 #define IE_THREAD_OMP 1 #define IE_THREAD_SEQ 2 +#define IE_THREAD_TBB_AUTO 3 -#if IE_THREAD == IE_THREAD_TBB +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) #define TBB_PREVIEW_LOCAL_OBSERVER 1 #include "tbb/task_scheduler_observer.h" #include "tbb/parallel_for.h" @@ -33,7 +34,11 @@ inline int parallel_get_num_threads() { return parallel_get_max_threads(); } inline int parallel_get_thread_num() { return tbb::this_task_arena::current_thread_index(); } inline void parallel_set_num_threads(int n) { return; } inline int parallel_get_env_threads() { return 0; } - +#if IE_THREAD == IE_THREAD_TBB + #define PARTITIONING , tbb::static_partitioner() +#else + #define PARTITIONING +#endif #elif IE_THREAD == IE_THREAD_OMP #include #include @@ -73,7 +78,7 @@ namespace InferenceEngine { template void parallel_nt(int nthr, const F &func) { -#if IE_THREAD == IE_THREAD_TBB +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) if (nthr == 0) nthr = parallel_get_max_threads(); if (nthr == 1) { func(0, 1); @@ -110,7 +115,7 @@ void parallel_nt_static(int nthr, const F &func) { } if (nthr == 0) nthr = parallel_get_max_threads(); -#if IE_THREAD == IE_THREAD_TBB +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) tbb::parallel_for(0, nthr, [&](int ithr) { func(ithr, nthr); } @@ -127,7 +132,7 @@ void parallel_nt_static(int nthr, const F &func) { template R parallel_sum(const T0 &D0, const R &input, const F &func) { -#if IE_THREAD == IE_THREAD_TBB +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) return tbb::parallel_reduce( tbb::blocked_range(0, D0), input, [&](const tbb::blocked_range& r, R init)->R { @@ -138,7 +143,7 @@ R parallel_sum(const T0 &D0, const R &input, const F &func) { }, [](R x, R y)->R { return x + y; - }); + } PARTITIONING); #else R sum = input; @@ -160,7 +165,7 @@ R parallel_sum(const T0 &D0, const R &input, const F &func) { template R parallel_sum2d(const T0 &D0, const T1 &D1, const R &input, const F &func) { -#if IE_THREAD == IE_THREAD_TBB +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) return tbb::parallel_reduce( tbb::blocked_range2d(0, D0, 0, D1), input, [&](const tbb::blocked_range2d& r, R init)->R { @@ -174,7 +179,7 @@ R parallel_sum2d(const T0 &D0, const T1 &D1, const R &input, const F &func) { }, [](R x, R y)->R { return x + y; - }); + } PARTITIONING); #else R sum = input; @@ -199,7 +204,7 @@ R parallel_sum2d(const T0 &D0, const T1 &D1, const R &input, const F &func) { } template R parallel_sum3d(const T0 &D0, const T1 &D1, const T2 &D2, const R &input, const F &func) { -#if IE_THREAD == IE_THREAD_TBB +#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) return tbb::parallel_reduce( tbb::blocked_range3d(0, D0, 0, D1, 0, D2), input, [&](const tbb::blocked_range3d& r, R init)->R { @@ -215,7 +220,7 @@ R parallel_sum3d(const T0 &D0, const T1 &D1, const T2 &D2, const R &input, const }, [](R x, R y)->R { return x + y; - }); + } PARTITIONING); #else R sum = input; @@ -289,12 +294,24 @@ void for_1d(const int &ithr, const int &nthr, const T0 &D0, const F &func) { template void parallel_for(const T0 &D0, const F &func) { #if IE_THREAD == IE_THREAD_TBB + auto work_amount = static_cast(D0); + int nthr = parallel_get_max_threads(); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); + if (nthr == 1) { + for_1d(0, 1, D0, func); + } else { + tbb::parallel_for(0, nthr, [&](int ithr) { + for_1d(ithr, nthr, D0, func); + }, tbb::static_partitioner()); + } +#elif IE_THREAD == IE_THREAD_TBB_AUTO const int nthr = parallel_get_max_threads(); tbb::parallel_for(0, nthr, [&](int ithr) { for_1d(ithr, nthr, D0, func); }); #elif IE_THREAD == IE_THREAD_OMP - # pragma omp parallel +# pragma omp parallel for_1d(parallel_get_thread_num(), parallel_get_num_threads(), D0, func); #elif IE_THREAD == IE_THREAD_SEQ for_1d(0, 1, D0, func); @@ -320,12 +337,24 @@ void for_2d(const int &ithr, const int &nthr, const T0 &D0, const T1 &D1, const template void parallel_for2d(const T0 &D0, const T1 &D1, const F &func) { #if IE_THREAD == IE_THREAD_TBB + auto work_amount = static_cast(D0 * D1); + int nthr = parallel_get_max_threads(); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); + if (nthr == 1) { + for_2d(0, 1, D0, D1, func); + } else { + tbb::parallel_for(0, nthr, [&](int ithr) { + for_2d(ithr, nthr, D0, D1, func); + }, tbb::static_partitioner()); + } +#elif IE_THREAD == IE_THREAD_TBB_AUTO const int nthr = parallel_get_max_threads(); tbb::parallel_for(0, nthr, [&](int ithr) { for_2d(ithr, nthr, D0, D1, func); }); #elif IE_THREAD == IE_THREAD_OMP - # pragma omp parallel +# pragma omp parallel for_2d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, func); #elif IE_THREAD == IE_THREAD_SEQ for_2d(0, 1, D0, D1, func); @@ -352,12 +381,24 @@ void for_3d(const int &ithr, const int &nthr, const T0 &D0, const T1 &D1, template void parallel_for3d(const T0 &D0, const T1 &D1, const T2 &D2, const F &func) { #if IE_THREAD == IE_THREAD_TBB + auto work_amount = static_cast(D0 * D1 * D2); + int nthr = parallel_get_max_threads(); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); + if (nthr == 1) { + for_3d(0, 1, D0, D1, D2, func); + } else { + tbb::parallel_for(0, nthr, [&](int ithr) { + for_3d(ithr, nthr, D0, D1, D2, func); + }, tbb::static_partitioner()); + } +#elif IE_THREAD == IE_THREAD_TBB_AUTO const int nthr = parallel_get_max_threads(); tbb::parallel_for(0, nthr, [&](int ithr) { for_3d(ithr, nthr, D0, D1, D2, func); }); #elif IE_THREAD == IE_THREAD_OMP - # pragma omp parallel +# pragma omp parallel for_3d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, func); #elif IE_THREAD == IE_THREAD_SEQ for_3d(0, 1, D0, D1, D2, func); @@ -383,12 +424,24 @@ void for_4d(const int &ithr, const int &nthr, const T0 &D0, const T1 &D1, template void parallel_for4d(const T0 &D0, const T1 &D1, const T2 &D2, const T3 &D3, const F &func) { #if IE_THREAD == IE_THREAD_TBB + auto work_amount = static_cast(D0 * D1 * D2 * D3); + int nthr = parallel_get_max_threads(); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); + if (nthr == 1) { + for_4d(0, 1, D0, D1, D2, D3, func); + } else { + tbb::parallel_for(0, nthr, [&](int ithr) { + for_4d(ithr, nthr, D0, D1, D2, D3, func); + }, tbb::static_partitioner()); + } +#elif IE_THREAD == IE_THREAD_TBB_AUTO const int nthr = parallel_get_max_threads(); tbb::parallel_for(0, nthr, [&](int ithr) { for_4d(ithr, nthr, D0, D1, D2, D3, func); }); #elif IE_THREAD == IE_THREAD_OMP - # pragma omp parallel +# pragma omp parallel for_4d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, func); #elif IE_THREAD == IE_THREAD_SEQ for_4d(0, 1, D0, D1, D2, D3, func); @@ -415,51 +468,29 @@ template (D0 * D1 * D2 * D3 * D4); + int nthr = parallel_get_max_threads(); + if (static_cast(nthr) > work_amount) + nthr = static_cast(work_amount); + if (nthr == 1) { + for_5d(0, 1, D0, D1, D2, D3, D4, func); + } else { + tbb::parallel_for(0, nthr, [&](int ithr) { + for_5d(ithr, nthr, D0, D1, D2, D3, D4, func); + }, tbb::static_partitioner()); + } +#elif IE_THREAD == IE_THREAD_TBB_AUTO const int nthr = parallel_get_max_threads(); tbb::parallel_for(0, nthr, [&](int ithr) { for_5d(ithr, nthr, D0, D1, D2, D3, D4, func); }); #elif IE_THREAD == IE_THREAD_OMP - # pragma omp parallel +# pragma omp parallel for_5d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, func); #elif IE_THREAD == IE_THREAD_SEQ for_5d(0, 1, D0, D1, D2, D3, D4, func); #endif } - -template -void for_6d(const int &ithr, const int &nthr, const T0 &D0, const T1 &D1, - const T2 &D2, const T3 &D3, const T4 &D4, const T5 &D5, F func) { - const size_t work_amount = (size_t)D0 * D1 * D2 * D3 * D4 * D5; - if (work_amount == 0) return; - size_t start{ 0 }, end{ 0 }; - splitter(work_amount, nthr, ithr, start, end); - - T0 d0{ 0 }; T1 d1{ 0 }; T2 d2{ 0 }; T3 d3{ 0 }; T4 d4{ 0 }; T5 d5{ 0 }; - parallel_it_init(start, d0, D0, d1, D1, d2, D2, d3, D3, d4, D4, - d5, D5); - for (size_t iwork = start; iwork < end; ++iwork) { - func(d0, d1, d2, d3, d4, d5); - parallel_it_step(d0, D0, d1, D1, d2, D2, d3, D3, d4, D4, d5, D5); - } -} - -template -void parallel_for6d(const T0 &D0, const T1 &D1, const T2 &D2, const T3 &D3, - const T4 &D4, const T5 &D5, F func) { -#if IE_THREAD == IE_THREAD_TBB - const int nthr = parallel_get_max_threads(); - tbb::parallel_for(0, nthr, [&](int ithr) { - for_6d(ithr, nthr, D0, D1, D2, D3, D4, D5, func); - }); -#elif IE_THREAD == IE_THREAD_OMP -# pragma omp parallel - for_6d(parallel_get_thread_num(), parallel_get_num_threads(), D0, D1, D2, D3, D4, D5, func); -#elif IE_THREAD == IE_THREAD_SEQ - for_6d(0, 1, D0, D1, D2, D3, D4, D5, func); -#endif -} - } // namespace InferenceEngine diff --git a/inference-engine/include/ie_parameter.hpp b/inference-engine/include/ie_parameter.hpp index e30d83dcae260c..9114118d86acea 100644 --- a/inference-engine/include/ie_parameter.hpp +++ b/inference-engine/include/ie_parameter.hpp @@ -51,8 +51,9 @@ class Parameter { * @tparam U Identity type-transformation * @param parameter object */ - template - Parameter(T&& parameter) { // NOLINT + template::type, Parameter>::value>::type > + Parameter(T&& parameter) { // NOLINT + static_assert(!std::is_same::type, Parameter>::value, "To prevent recursion"); ptr = new RealData::type>(std::forward(parameter)); } diff --git a/inference-engine/include/ie_plugin.hpp b/inference-engine/include/ie_plugin.hpp index 2712f1fbaa45d5..9229a74f5b040e 100644 --- a/inference-engine/include/ie_plugin.hpp +++ b/inference-engine/include/ie_plugin.hpp @@ -43,10 +43,58 @@ namespace InferenceEngine { /** * @brief Responce structure encapsulating information about supported layer */ -struct QueryNetworkResult { +struct INFERENCE_ENGINE_API_CLASS(QueryNetworkResult) { + /** + * @deprecated Use QueryNetworkResult::supportedLayersMap which provides layer -> device mapping + * @brief Set of supported layers by specific device + */ + INFERENCE_ENGINE_DEPRECATED std::set supportedLayers; + + /** + * @brief A map of supported layers: + * - key - a layer name + * - value - a device name on which layer is assigned + */ + std::map supportedLayersMap; + + /** + * @brief A status code + */ StatusCode rc; + + /** + * @brief Response mssage + */ ResponseDesc resp; + + /** + * @brief A default constructor + */ + QueryNetworkResult(); + + /** + * @brief A copy constructor + * @param q Value to copy from + */ + QueryNetworkResult(const QueryNetworkResult & q); + + /** + * @brief A copy assignment operator + * @param q A value to copy from + */ + const QueryNetworkResult & operator= (const QueryNetworkResult & q); + + /** + * @brief A move assignment operator + * @param q A value to move from + */ + QueryNetworkResult & operator= (QueryNetworkResult && q); + + /** + * @brief A desctructor + */ + ~QueryNetworkResult(); }; /** @@ -68,13 +116,14 @@ class IInferencePlugin : public details::IRelease { virtual void SetLogCallback(IErrorListener &listener) noexcept = 0; /** - * @deprecated use LoadNetwork with four parameters (executable network, cnn network, config, response) + * @deprecated Use IInferencePlugin::LoadNetwork(IExecutableNetwork::Ptr &, ICNNNetwork &, const std::map &, ResponseDesc *) * @brief Loads a pre-built network with weights to the engine. In case of success the plugin will * be ready to infer * @param network Network object acquired from CNNNetReader * @param resp Pointer to the response message that holds a description of an error if any occurred * @return Status code of the operation. OK if succeeded */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode LoadNetwork(ICNNNetwork &network, ResponseDesc *resp) noexcept = 0; /** @@ -103,7 +152,7 @@ class IInferencePlugin : public details::IRelease { const std::map &config, ResponseDesc *resp) noexcept = 0; /** - * @deprecated Uses Infer() working with multiple inputs and outputs + * @deprecated Load IExecutableNetwork to create IInferRequest * @brief Infers an image(s). * Input and output dimensions depend on the topology. * As an example for classification topologies use a 4D Blob as input (batch, channels, width, @@ -116,10 +165,11 @@ class IInferencePlugin : public details::IRelease { * @param resp Pointer to the response message that holds a description of an error if any occurred * @return Status code of the operation. OK if succeeded */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode Infer(const Blob &input, Blob &result, ResponseDesc *resp) noexcept = 0; /** - * @deprecated Loads IExecutableNetwork to create IInferRequest. + * @deprecated Load IExecutableNetwork to create IInferRequest. * @brief Infers tensors. Input and output dimensions depend on the topology. * As an example for classification topologies use a 4D Blob as input (batch, channels, width, * height) and get a 1D blob as output (scoring probability vector). To Infer a batch, @@ -130,16 +180,18 @@ class IInferencePlugin : public details::IRelease { * @param resp Pointer to the response message that holds a description of an error if any occurred * @return Status code of the operation. OK if succeeded */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode Infer(const BlobMap &input, BlobMap &result, ResponseDesc *resp) noexcept = 0; /** - * @deprecated Uses IInferRequest to get performance measures + * @deprecated Use IInferRequest to get performance measures * @brief Queries performance measures per layer to get feedback of what is the most time consuming layer * Note: not all plugins provide meaningful data * @param perfMap Map of layer names to profiling information for that layer * @param resp Pointer to the response message that holds a description of an error if any occurred * @return Status code of the operation. OK if succeeded */ + INFERENCE_ENGINE_DEPRECATED virtual StatusCode GetPerformanceCounts(std::map &perfMap, ResponseDesc *resp) const noexcept = 0; @@ -156,16 +208,18 @@ class IInferencePlugin : public details::IRelease { * @brief Sets configuration for plugin, acceptable keys can be found in ie_plugin_config.hpp * @param config Map of pairs: (config parameter name, config parameter value) * @param resp Pointer to the response message that holds a description of an error if any occurred + * @return Status code of the operation. OK if succeeded */ virtual StatusCode SetConfig(const std::map &config, ResponseDesc *resp) noexcept = 0; /** - * @depricated Use the version with config parameter + * @deprecated Use IInferencePlugin::QueryNetwork(const ICNNNetwork&, const std::map &, QueryNetworkResult&) const * @brief Query plugin if it supports specified network * @param network Network object to query - * @param resp Pointer to the response message that holds a description of an error if any occurred + * @param res Reference to query network result */ + INFERENCE_ENGINE_DEPRECATED virtual void QueryNetwork(const ICNNNetwork& /*network*/, QueryNetworkResult& res) const noexcept { res.rc = InferenceEngine::NOT_IMPLEMENTED; } @@ -174,7 +228,7 @@ class IInferencePlugin : public details::IRelease { * @brief Query plugin if it supports specified network with specified configuration * @param network Network object to query * @param config Map of pairs: (config parameter name, config parameter value) - * @param resp Pointer to the response message that holds a description of an error if any occurred + * @param res Reference to query network result */ virtual void QueryNetwork(const ICNNNetwork& /*network*/, const std::map &/*config*/, QueryNetworkResult& res) const noexcept { diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/include/ie_plugin_config.hpp index 028b40491c2dbe..a3764e8216f393 100644 --- a/inference-engine/include/ie_plugin_config.hpp +++ b/inference-engine/include/ie_plugin_config.hpp @@ -12,9 +12,132 @@ #pragma once #include +#include +#include namespace InferenceEngine { +namespace Metrics { + +#ifndef DECLARE_METRIC_KEY_IMPL +# define DECLARE_METRIC_KEY_IMPL(...) +#endif + +/** +* @brief shortcut for defining common Inference Engine metrics +*/ + +#define METRIC_KEY(name) InferenceEngine::Metrics::METRIC_##name +#define EXEC_NETWORK_METRIC_KEY(name) METRIC_KEY(name) + +#define DECLARE_METRIC_KEY(name, ...) \ + static constexpr auto METRIC_##name = #name; \ + DECLARE_METRIC_KEY_IMPL(name, __VA_ARGS__) + +#define DECLARE_EXEC_NETWORK_METRIC_KEY(name, ...) DECLARE_METRIC_KEY(name, __VA_ARGS__) + +/** +* @brief shortcut for defining metric values +*/ +#define METRIC_VALUE(name) InferenceEngine::Metrics::name +#define DECLARE_METRIC_VALUE(name) static constexpr auto name = #name + +/** +* @brief Metric to get a std::vector of available device IDs. String value is "AVAILABLE_DEVICES" +*/ +DECLARE_METRIC_KEY(AVAILABLE_DEVICES, std::vector); + +/** +* @brief Metric to get a std::vector of supported metrics. String value is "SUPPORTED_METRICS" +* This can be used as an executable network metric as well. +* +* Each of the returned device metrics can be passed to Core::GetMetric, executable network metrics +* can be passed to ExecutableNetwork::GetMetric. +* +*/ +DECLARE_METRIC_KEY(SUPPORTED_METRICS, std::vector); + +/** +* @brief Metric to get a std::vector of supported config keys. String value is "SUPPORTED_CONFIG_KEYS" +* This can be used as an executable network metric as well. +* +* Each of the returned device configuration keys can be passed to Core::SetConfig, Core::GetConfig, and Core::LoadNetwork, +* configuration keys for executable networks can be passed to ExecutableNetwork::SetConfig and ExecutableNetwork::GetConfig. +* +*/ +DECLARE_METRIC_KEY(SUPPORTED_CONFIG_KEYS, std::vector); + +/** +* @brief Metric to get a std::string value representing a full device name. String value is "FULL_DEVICE_NAME" +*/ +DECLARE_METRIC_KEY(FULL_DEVICE_NAME, std::string); + +/** +* @brief Metric to get a std::vector of optimization options per device. String value is "OPTIMIZATION_CAPABILITIES" +* The possible values: +* - "FP32" - device can support FP32 models +* - "FP16" - device can support FP16 models +* - "INT8" - device can support models with INT8 layers +* - "BIN" - device can support models with BIN layers +* - "WINOGRAD" - device can support models where convolution implemented via Winograd transformations +*/ +DECLARE_METRIC_KEY(OPTIMIZATION_CAPABILITIES, std::vector); + +DECLARE_METRIC_VALUE(FP32); +DECLARE_METRIC_VALUE(FP16); +DECLARE_METRIC_VALUE(INT8); +DECLARE_METRIC_VALUE(BIN); +DECLARE_METRIC_VALUE(WINOGRAD); + +/** +* @brief Metric to provide information about a range for streams on platforms where streams are supported. +* Metric returns a value of std::tuple type, where: +* - First value is bottom bound. +* - Second value is upper bound. +* String value for metric name is "RANGE_FOR_STREAMS". +*/ +DECLARE_METRIC_KEY(RANGE_FOR_STREAMS, std::tuple); + +/** +* @brief Metric to provide a hint for a range for number of async infer requests. If device supports streams, +* the metric provides range for number of IRs per stream. +* Metric returns a value of std::tuple type, where: +* - First value is bottom bound. +* - Second value is upper bound. +* - Third value is step inside this range. +* String value for metric name is "RANGE_FOR_ASYNC_INFER_REQUESTS". +*/ +DECLARE_METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS, std::tuple); + +/** +* @brief Metric to get an unsigned int value of number of waiting infer request. +* String value is "NUMBER_OF_WAITNING_INFER_REQUESTS". This can be used as an executable network metric as well +*/ +DECLARE_METRIC_KEY(NUMBER_OF_WAITING_INFER_REQUESTS, unsigned int); + +/** +* @brief Metric to get an unsigned int value of number of infer request in execution stage. +* String value is "NUMBER_OF_EXEC_INFER_REQUESTS". This can be used as an executable network metric as well +*/ +DECLARE_METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS, unsigned int); + +/** +* @brief Metric to get a name of network. String value is "NETWORK_NAME". +*/ +DECLARE_EXEC_NETWORK_METRIC_KEY(NETWORK_NAME, std::string); + +/** + * @brief Metric to get a float of device thermal. String value is "DEVICE_THERMAL" + */ +DECLARE_METRIC_KEY(DEVICE_THERMAL, float); + +/** +* @brief Metric to get an unsigned integer value of optimal number of executable network infer requests. +*/ +DECLARE_EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS, unsigned int); + +} // namespace Metrics + namespace PluginConfigParams { /** @@ -24,7 +147,6 @@ namespace PluginConfigParams { #define _CONFIG_KEY(name) KEY_##name #define DECLARE_CONFIG_KEY(name) static constexpr auto _CONFIG_KEY(name) = #name - /** * @brief shortcut for defining configuration values */ @@ -64,6 +186,16 @@ DECLARE_CONFIG_VALUE(CPU_THROUGHPUT_NUMA); DECLARE_CONFIG_VALUE(CPU_THROUGHPUT_AUTO); DECLARE_CONFIG_KEY(CPU_THROUGHPUT_STREAMS); +/** +* @brief Optimize GPU plugin execution to maximize throughput. +* It is passed to IInferencePlugin::SetConfig(), this option should be used with values: +* - KEY_GPU_THROUGHPUT_AUTO creates bare minimum of streams that might improve performance in some cases, +* this option allows to enable throttle hint for opencl queue thus reduce CPU load without significant performance drop +* - a positive integer value creates the requested number of streams +*/ +DECLARE_CONFIG_VALUE(GPU_THROUGHPUT_AUTO); +DECLARE_CONFIG_KEY(GPU_THROUGHPUT_STREAMS); + /** * @brief The name for setting performance counters option. diff --git a/inference-engine/include/ie_plugin_dispatcher.hpp b/inference-engine/include/ie_plugin_dispatcher.hpp index b041d077728cae..41b4e41cbb76ba 100644 --- a/inference-engine/include/ie_plugin_dispatcher.hpp +++ b/inference-engine/include/ie_plugin_dispatcher.hpp @@ -17,108 +17,45 @@ namespace InferenceEngine { /** * @brief This is a class to load a suitable plugin */ -class PluginDispatcher { +class INFERENCE_ENGINE_API_CLASS(PluginDispatcher) { public: /** * @brief A constructor * @param pp Vector of paths to plugin directories */ - explicit PluginDispatcher(const std::vector &pp = {file_name_t()}) : pluginDirs(pp) {} + explicit PluginDispatcher(const std::vector &pp = {file_name_t()}); /** - * @brief Loads a plugin from plugin directories - * @param name Plugin name - * @return A pointer to the loaded plugin - */ - virtual InferencePlugin getPluginByName(const file_name_t& name) const { - std::stringstream err; - for (auto &pluginPath : pluginDirs) { - try { - return InferencePlugin(InferenceEnginePluginPtr(make_plugin_name(pluginPath, name))); - } - catch (const std::exception &ex) { - err << "cannot load plugin: " << fileNameToString(name) << " from " << fileNameToString(pluginPath) << ": " << ex.what() << ", skipping\n"; - } - } - THROW_IE_EXCEPTION << "Plugin " << fileNameToString(name) << " cannot be loaded: " << err.str() << "\n"; - } + * @brief Loads a plugin from plugin directories + * @param name Plugin name + * @return A pointer to the loaded plugin + */ + virtual InferencePlugin getPluginByName(const file_name_t& name) const; /** - * @brief Loads a plugin from directories that is suitable for the device string - * @return A pointer to the plugin - */ - InferencePlugin getPluginByDevice(const std::string& deviceName) const { - InferenceEnginePluginPtr ptr; - // looking for HETERO: if can find, add everything after ':' to the options of hetero plugin - if (deviceName.find("HETERO:") == 0) { - ptr = getSuitablePlugin(InferenceEngine::TargetDeviceInfo::fromStr("HETERO")); - if (ptr) { - InferenceEngine::ResponseDesc response; - ptr->SetConfig({ { "TARGET_FALLBACK", deviceName.substr(7, deviceName.length() - 7) } }, &response); - } - } else { - ptr = getSuitablePlugin(InferenceEngine::TargetDeviceInfo::fromStr(deviceName)); - } - return InferencePlugin(ptr); - } + * @deprecated Use InferenceEngine::Core to work with devices by name + * @brief Loads a plugin from directories that is suitable for the device string + * @return A pointer to the plugin + */ + INFERENCE_ENGINE_DEPRECATED + InferencePlugin getPluginByDevice(const std::string& deviceName) const; /** - * @brief Loads a plugin from directories that is suitable for the device - * @return A pointer to the plugin - */ - InferenceEnginePluginPtr getSuitablePlugin(TargetDevice device) const { - FindPluginResponse result; - ResponseDesc desc; - if (InferenceEngine::OK != findPlugin({ device }, result, &desc)) { - THROW_IE_EXCEPTION << desc.msg; - } - - std::stringstream err; - for (std::string& name : result.names) { - try { - return getPluginByName(stringToFileName(name)); - } - catch (const std::exception &ex) { - err << "Tried load plugin : " << name << ", error: " << ex.what() << "\n"; - } - } - THROW_IE_EXCEPTION << "Cannot find plugin to use :" << err.str() << "\n"; - } + * @deprecated Use InferenceEngine::Core to work with devices by name + * @brief Loads a plugin from directories that is suitable for the device + * @return A pointer to the plugin + */ + INFERENCE_ENGINE_DEPRECATED + InferenceEnginePluginPtr getSuitablePlugin(TargetDevice device) const; protected: /** - * @brief Creates path to the plugin - * @param path Path to the plugin - * @param input Plugin name - * @return The path to the plugin - */ - file_name_t make_plugin_name(const file_name_t &path, const file_name_t &input) const { - file_name_t separator = -#if defined _WIN32 || defined __CYGWIN__ -# if defined UNICODE - L"\\"; -# else - "\\"; -# endif -#else - "/"; -#endif - if (path.empty()) - separator = file_name_t(); -#ifdef _WIN32 - return path + separator + input + -# if defined UNICODE - L".dll"; -# else - ".dll"; -# endif -#elif __APPLE__ - return path + separator + "lib" + input + ".dylib"; -#else - return path + separator + "lib" + input + ".so"; -#endif - } - + * @brief Creates path to the plugin + * @param path Path to the plugin + * @param input Plugin name + * @return The path to the plugin + */ + file_name_t make_plugin_name(const file_name_t &path, const file_name_t &input) const; private: std::vector pluginDirs; diff --git a/inference-engine/include/ie_plugin_ptr.hpp b/inference-engine/include/ie_plugin_ptr.hpp index 84f2a20aed430f..7a460ec686bd12 100644 --- a/inference-engine/include/ie_plugin_ptr.hpp +++ b/inference-engine/include/ie_plugin_ptr.hpp @@ -32,6 +32,7 @@ class SOCreatorTrait { /** * @brief This class defines the name of the fabric for creating an IHeteroInferencePlugin object in DLL */ +IE_SUPPRESS_DEPRECATED_START template<> class SOCreatorTrait { public: @@ -40,6 +41,7 @@ class SOCreatorTrait { */ static constexpr auto name = "CreatePluginEngine"; }; +IE_SUPPRESS_DEPRECATED_END } // namespace details @@ -50,6 +52,8 @@ class SOCreatorTrait { using InferenceEnginePluginPtr = InferenceEngine::details::SOPointer; /** @copybrief InferenceEnginePluginPtr */ +IE_SUPPRESS_DEPRECATED_START using HeteroPluginPtr = InferenceEngine::details::SOPointer; +IE_SUPPRESS_DEPRECATED_END } // namespace InferenceEngine diff --git a/inference-engine/include/ie_precision.hpp b/inference-engine/include/ie_precision.hpp index 32f4e986390265..c4d63a59563a58 100644 --- a/inference-engine/include/ie_precision.hpp +++ b/inference-engine/include/ie_precision.hpp @@ -30,6 +30,7 @@ class Precision { I8 = 50, /**< 8bit signed integer value */ U16 = 60, /**< 16bit unsigned integer value */ I32 = 70, /**< 32bit signed integer value */ + I64 = 72, /**< 64bit signed integer value */ BIN = 71, /**< 1bit integer value */ CUSTOM = 80 /**< custom precision has it's own name and size of elements */ }; @@ -97,6 +98,7 @@ class Precision { CASE2(FP16, int16_t, uint16_t); CASE(I16, int16_t); CASE(I32, int32_t); + CASE(I64, int64_t); CASE(U16, uint16_t); CASE(U8, uint8_t); CASE(I8, int8_t); @@ -164,6 +166,7 @@ class Precision { PRECISION_NAME(I8), PRECISION_NAME(I16), PRECISION_NAME(I32), + PRECISION_NAME(I64), PRECISION_NAME(U16), PRECISION_NAME(FP32), PRECISION_NAME(FP16), @@ -215,6 +218,7 @@ class Precision { CASE(FP16); CASE(I16); CASE(I32); + CASE(I64); CASE(U16); CASE(U8); CASE(I8); @@ -269,6 +273,10 @@ struct PrecisionTrait { using value_type = int32_t; }; template<> +struct PrecisionTrait { + using value_type = int64_t; +}; +template<> struct PrecisionTrait { using value_type = int8_t; }; diff --git a/inference-engine/include/ie_preprocess.hpp b/inference-engine/include/ie_preprocess.hpp index 0a969eebd4d3d8..ee839de47105ef 100644 --- a/inference-engine/include/ie_preprocess.hpp +++ b/inference-engine/include/ie_preprocess.hpp @@ -61,10 +61,13 @@ class PreProcessInfo { // Resize Algorithm to be applied for input before inference if needed. ResizeAlgorithm _resizeAlg = NO_RESIZE; + // Color format to be used in on-demand color conversions applied to input before inference + ColorFormat _colorFormat = ColorFormat::RAW; + public: /** - * @brief Overloaded [] operator to safely get the channel by an index. - * Throws an exception if channels are empty. + * @brief Overloaded [] operator to safely get the channel by an index + * Throws an exception if channels are empty * @param index Index of the channel to get * @return The pre-process channel instance */ @@ -122,13 +125,13 @@ class PreProcessInfo { void setMeanImage(const Blob::Ptr &meanImage) { if (meanImage.get() == nullptr) { THROW_IE_EXCEPTION << "Failed to set invalid mean image: nullptr"; - } else if (meanImage.get()->dims().size() != 3) { + } else if (meanImage.get()->getTensorDesc().getLayout() != Layout::CHW) { + THROW_IE_EXCEPTION << "Mean image layout should be CHW"; + } else if (meanImage.get()->getTensorDesc().getDims().size() != 3) { THROW_IE_EXCEPTION << "Failed to set invalid mean image: number of dimensions != 3"; - } else if (meanImage.get()->dims()[2] != getNumberOfChannels()) { + } else if (meanImage.get()->getTensorDesc().getDims()[0] != getNumberOfChannels()) { THROW_IE_EXCEPTION << "Failed to set invalid mean image: number of channels != " << getNumberOfChannels(); - } else if (meanImage.get()->layout() != Layout::CHW) { - THROW_IE_EXCEPTION << "Mean image layout should be CHW"; } _variant = MEAN_IMAGE; } @@ -142,7 +145,7 @@ class PreProcessInfo { void setMeanImageForChannel(const Blob::Ptr &meanImage, const size_t channel) { if (meanImage.get() == nullptr) { THROW_IE_EXCEPTION << "Failed to set invalid mean image for channel: nullptr"; - } else if (meanImage.get()->dims().size() != 2) { + } else if (meanImage.get()->getTensorDesc().getDims().size() != 2) { THROW_IE_EXCEPTION << "Failed to set invalid mean image for channel: number of dimensions != 2"; } else if (channel >= _channelsInfo.size()) { THROW_IE_EXCEPTION << "Channel " << channel << " exceed number of PreProcess channels: " @@ -183,5 +186,26 @@ class PreProcessInfo { ResizeAlgorithm getResizeAlgorithm() const { return _resizeAlg; } + + /** + * @brief Changes the color format of the input data provided by the user + * This function should be called before loading the network to the plugin + * Setting color format different from ColorFormat::RAW enables automatic color conversion + * (as a part of built-in preprocessing routine) + * @param fmt A new color format associated with the input + */ + void setColorFormat(ColorFormat fmt) { + _colorFormat = fmt; + } + + /** + * @brief Gets a color format associated with the input + * @details By default, the color format is ColorFormat::RAW meaning + * there is no particular color format assigned to the input + * @return Color format. + */ + ColorFormat getColorFormat() const { + return _colorFormat; + } }; } // namespace InferenceEngine diff --git a/inference-engine/include/inference_engine.hpp b/inference-engine/include/inference_engine.hpp index 2df7fda60931c3..48ced7de905bbf 100644 --- a/inference-engine/include/inference_engine.hpp +++ b/inference-engine/include/inference_engine.hpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -36,10 +37,11 @@ namespace InferenceEngine { */ template inline void TopResults(unsigned int n, TBlob &input, std::vector &output) { - size_t input_rank = input.dims().size(); - if (!input_rank || !input.dims().at(input_rank - 1)) + SizeVector dims = input.getTensorDesc().getDims(); + size_t input_rank = dims.size(); + if (!input_rank || !dims[0]) THROW_IE_EXCEPTION << "Input blob has incorrect dimensions!"; - size_t batchSize = input.dims().at(input_rank - 1); + size_t batchSize = dims[0]; std::vector indexes(input.size() / batchSize); n = static_cast(std::min((size_t) n, input.size())); @@ -77,7 +79,7 @@ inline void TopResults(unsigned int n, TBlob &input, std::vector &o * @param output Vector of indexes for the top n places */ inline void TopResults(unsigned int n, Blob &input, std::vector &output) { - switch (input.precision()) { + switch (input.getTensorDesc().getPrecision()) { TBLOB_TOP_RESULT(FP32); TBLOB_TOP_RESULT(FP16); TBLOB_TOP_RESULT(Q78); @@ -87,7 +89,7 @@ inline void TopResults(unsigned int n, Blob &input, std::vector &outpu TBLOB_TOP_RESULT(U16); TBLOB_TOP_RESULT(I32); default: - THROW_IE_EXCEPTION << "cannot locate blob for precision: " << input.precision(); + THROW_IE_EXCEPTION << "cannot locate blob for precision: " << input.getTensorDesc().getPrecision(); } } @@ -103,13 +105,14 @@ inline void TopResults(unsigned int n, Blob &input, std::vector &outpu */ template void copyFromRGB8(uint8_t *RGB8, size_t RGB8_size, InferenceEngine::TBlob *blob) { - if (4 != blob->dims().size()) + SizeVector dims = blob->getTensorDesc().getDims(); + if (4 != dims.size()) THROW_IE_EXCEPTION << "Cannot write data to input blob! Blob has incorrect dimensions size " - << blob->dims().size(); - size_t num_channels = blob->dims()[2]; // because RGB - size_t num_images = blob->dims()[3]; - size_t w = blob->dims()[0]; - size_t h = blob->dims()[1]; + << dims.size(); + size_t num_channels = dims[1]; // because RGB + size_t num_images = dims[0]; + size_t w = dims[3]; + size_t h = dims[2]; size_t nPixels = w * h; if (RGB8_size != w * h * num_channels * num_images) @@ -168,7 +171,7 @@ void copyToFloat(float *dst, const InferenceEngine::Blob *src) { } const InferenceEngine::TBlob *t_blob = dynamic_cast *>(src); if (t_blob == nullptr) { - THROW_IE_EXCEPTION << "input type is " << src->precision() << " but input is not " << typeid(T).name(); + THROW_IE_EXCEPTION << "input type is " << src->getTensorDesc().getPrecision() << " but input is not " << typeid(T).name(); } const T *srcPtr = t_blob->readOnly(); diff --git a/inference-engine/include/vpu/myriad_plugin_config.hpp b/inference-engine/include/vpu/myriad_plugin_config.hpp new file mode 100644 index 00000000000000..7d820c26c82591 --- /dev/null +++ b/inference-engine/include/vpu/myriad_plugin_config.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief A header that defines advanced related properties for VPU plugins. + * These properties should be used in SetConfig() and LoadNetwork() methods of plugins + * + * @file vpu_plugin_config.hpp + */ + +#pragma once + +#include + +#include "ie_plugin_config.hpp" +#include "ie_api.h" + +#define VPU_MYRIAD_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_MYRIAD_##name) +#define VPU_MYRIAD_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_MYRIAD_##name + +#define DECLARE_VPU_MYRIAD_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_MYRIAD_##name) +#define DECLARE_VPU_MYRIAD_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_MYRIAD_##name) + +namespace InferenceEngine { + +namespace VPUConfigParams { + +/** + * @brief The flag to reset stalled devices: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default) + * This is a plugin scope option and must be used with the plugin's SetConfig method + */ +DECLARE_VPU_MYRIAD_CONFIG_KEY(FORCE_RESET); + +/** + * @brief This option allows to specify device. + * If specified device is not available then creating infer request will throw an exception. + */ +DECLARE_VPU_MYRIAD_CONFIG_KEY(PLATFORM); + +/** + * @brief Supported keys definition for VPU_MYRIAD_CONFIG_KEY(PLATFORM) option. + */ +DECLARE_VPU_MYRIAD_CONFIG_VALUE(2450); +DECLARE_VPU_MYRIAD_CONFIG_VALUE(2480); + +} // namespace VPUConfigParams + +} // namespace InferenceEngine diff --git a/inference-engine/include/vpu/vpu_plugin_config.hpp b/inference-engine/include/vpu/vpu_plugin_config.hpp index e0ef1575929849..69d04f1ed76977 100644 --- a/inference-engine/include/vpu/vpu_plugin_config.hpp +++ b/inference-engine/include/vpu/vpu_plugin_config.hpp @@ -12,7 +12,14 @@ #pragma once #include + #include "ie_plugin_config.hpp" +#include "myriad_plugin_config.hpp" +#include "ie_api.h" + +// +// Common options +// #define VPU_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_##name) #define VPU_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_##name @@ -20,9 +27,21 @@ #define DECLARE_VPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_##name) #define DECLARE_VPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_##name) +// +// Common metrics +// + +#define VPU_METRIC(name) METRIC_KEY(VPU_##name) +#define DECLARE_VPU_METRIC(name, ...) DECLARE_METRIC_KEY(VPU_##name, __VA_ARGS__) + namespace InferenceEngine { + namespace VPUConfigParams { +// +// Common options +// + /** * @brief Turn on HW stages usage (applicable for MyriadX devices only). * This option should be used with values: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default) @@ -41,6 +60,7 @@ DECLARE_VPU_CONFIG_KEY(LOG_LEVEL); * @brief The key to define normalization coefficient for the network input. * This option should used with be a real number. Example "255.f" */ +INFERENCE_ENGINE_DEPRECATED DECLARE_VPU_CONFIG_KEY(INPUT_NORM); /** @@ -48,6 +68,7 @@ DECLARE_VPU_CONFIG_KEY(INPUT_NORM); * @brief The flag to specify Bias value that is added to each element of the network input. * This option should used with be a real number. Example "0.1f" */ +INFERENCE_ENGINE_DEPRECATED DECLARE_VPU_CONFIG_KEY(INPUT_BIAS); /** @@ -56,12 +77,6 @@ DECLARE_VPU_CONFIG_KEY(INPUT_BIAS); */ DECLARE_VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME); -/** - * @brief The flag to reset stalled devices: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default) - * This is a plugin scope option and must be used with the plugin's SetConfig method - */ -DECLARE_VPU_CONFIG_KEY(FORCE_RESET); - /** * @brief This option allows to pass extra configuration for executable network. * By default, it is empty string, which means - no configuration. @@ -84,12 +99,6 @@ DECLARE_VPU_CONFIG_KEY(NETWORK_CONFIG); */ DECLARE_VPU_CONFIG_KEY(COMPUTE_LAYOUT); -/** - * @brief This option allows to pass custom layers binding xml. - * If layer is present in such an xml, it would be used during inference even if the layer is natively supported - */ -DECLARE_VPU_CONFIG_KEY(CUSTOM_LAYERS); - /** * @brief Supported keys definition for VPU_CONFIG_KEY(COMPUTE_LAYOUT) option. */ @@ -98,16 +107,10 @@ DECLARE_VPU_CONFIG_VALUE(NCHW); DECLARE_VPU_CONFIG_VALUE(NHWC); /** - * @brief This option allows to specify device. - * If specified device is not available then creating infer request will throw an exception. - */ -DECLARE_VPU_CONFIG_KEY(PLATFORM); - -/** - * @brief Supported keys definition for VPU_CONFIG_KEY(PLATFORM) option. + * @brief This option allows to pass custom layers binding xml. + * If layer is present in such an xml, it would be used during inference even if the layer is natively supported */ -DECLARE_VPU_CONFIG_VALUE(2450); -DECLARE_VPU_CONFIG_VALUE(2480); +DECLARE_VPU_CONFIG_KEY(CUSTOM_LAYERS); /** * @brief Ignore statistic in IR by plugin. @@ -117,5 +120,37 @@ DECLARE_VPU_CONFIG_VALUE(2480); */ DECLARE_VPU_CONFIG_KEY(IGNORE_IR_STATISTIC); +/** + * @brief This option allows to specify protocol. + */ +DECLARE_VPU_MYRIAD_CONFIG_KEY(PROTOCOL); + +/** + * @brief Supported keys definition for VPU_MYRIAD_CONFIG_KEY(PROTOCOL) option. + */ +DECLARE_VPU_MYRIAD_CONFIG_VALUE(PCIE); +DECLARE_VPU_MYRIAD_CONFIG_VALUE(USB); + +/** + * @deprecated Use VPU_MYRIAD_CONFIG_KEY(FORCE_RESET) instead. + */ +INFERENCE_ENGINE_DEPRECATED +DECLARE_VPU_CONFIG_KEY(FORCE_RESET); + +/** + * @deprecated Use VPU_MYRIAD_CONFIG_KEY(PLATFORM) instead. + */ +INFERENCE_ENGINE_DEPRECATED +DECLARE_VPU_CONFIG_KEY(PLATFORM); + +/** + * @brief Supported keys definition for DECLARE_VPU_CONFIG_KEY(PLATFORM) option. + */ +INFERENCE_ENGINE_DEPRECATED +DECLARE_VPU_CONFIG_VALUE(2450); +INFERENCE_ENGINE_DEPRECATED +DECLARE_VPU_CONFIG_VALUE(2480); + } // namespace VPUConfigParams + } // namespace InferenceEngine diff --git a/inference-engine/install_dependencies.sh b/inference-engine/install_dependencies.sh index 4f1849fbd3da2b..a04c1812a5de10 100755 --- a/inference-engine/install_dependencies.sh +++ b/inference-engine/install_dependencies.sh @@ -5,7 +5,7 @@ params=$@ -function yes_or_no { +yes_or_no() { if [ "$params" == "-y" ]; then return 0 fi @@ -20,7 +20,7 @@ function yes_or_no { } # install dependencies -if [[ -f /etc/lsb-release ]]; then +if [ -f /etc/lsb-release ]; then # Ubuntu sudo -E apt update sudo -E apt-get install -y \ @@ -56,7 +56,7 @@ if [[ -f /etc/lsb-release ]]; then else sudo -E apt-get install -y libpng-dev fi -elif [[ -f /etc/redhat-release ]]; then +elif [ -f /etc/redhat-release ]; then # CentOS 7.x sudo -E yum install -y centos-release-scl epel-release sudo -E yum install -y \ @@ -124,6 +124,40 @@ elif [[ -f /etc/redhat-release ]]; then echo "FFmpeg installation skipped. You may build FFmpeg from sources as described here: https://trac.ffmpeg.org/wiki/CompilationGuide/Centos" echo fi +elif [ -f /etc/os-release ] && grep -q "raspbian" /etc/os-release; then + # Raspbian + sudo -E apt update + sudo -E apt-get install -y \ + build-essential \ + cmake \ + curl \ + wget \ + libssl-dev \ + ca-certificates \ + git \ + libboost-regex-dev \ + libgtk2.0-dev \ + pkg-config \ + unzip \ + automake \ + libtool \ + autoconf \ + libcairo2-dev \ + libpango1.0-dev \ + libglib2.0-dev \ + libgtk2.0-dev \ + libswscale-dev \ + libavcodec-dev \ + libavformat-dev \ + libgstreamer1.0-0 \ + gstreamer1.0-plugins-base \ + libusb-1.0-0-dev \ + libopenblas-dev + if apt-cache search --names-only '^libpng12'| grep -q libpng12; then + sudo -E apt-get install -y libpng12-dev + else + sudo -E apt-get install -y libpng-dev + fi else echo "Unknown OS, please install build dependencies manually" fi \ No newline at end of file diff --git a/inference-engine/samples/CMakeLists.txt b/inference-engine/samples/CMakeLists.txt index 4e13c11e418d49..d354f647cecb44 100644 --- a/inference-engine/samples/CMakeLists.txt +++ b/inference-engine/samples/CMakeLists.txt @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -cmake_minimum_required (VERSION 2.8) +cmake_minimum_required (VERSION 2.8.11) project(Samples) @@ -12,12 +12,11 @@ if (CMAKE_BUILD_TYPE STREQUAL "") endif() if (NOT(BIN_FOLDER)) - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") - set (ARCH armv7l) - elseif("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - set (ARCH intel64) - else() - set (ARCH ia32) + string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} ARCH) + if(ARCH STREQUAL "x86_64" OR ARCH STREQUAL "amd64") # Windows detects Intel's 64-bit CPU as AMD64 + set(ARCH intel64) + elseif(ARCH STREQUAL "i386") + set(ARCH ia32) endif() set (BIN_FOLDER ${ARCH}) @@ -79,7 +78,6 @@ else() endif() endif() - #################################### ## to use C++11 set (CMAKE_CXX_STANDARD 11) @@ -99,10 +97,6 @@ if (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") endif() -include_directories ( - ${CMAKE_CURRENT_SOURCE_DIR}/common - ${CMAKE_CURRENT_SOURCE_DIR}/common/format_reader -) add_subdirectory(common/format_reader) # samples build can be switched off during whole IE build @@ -132,22 +126,85 @@ function(add_samples_to_build) endforeach() endfunction(add_samples_to_build) +include(CMakeParseArguments) + +# +# ie_add_sample(NAME +# SOURCES +# [HEADERS
] +# [INCLUDE_DIRECTORIES ] +# [DEPENDENCIES ] +# [OPENCV_DENENDENCIES ] +# [EXCLUDE_CPPLINT] +# +macro(ie_add_sample) + set(options EXCLUDE_CPPLINT) + set(oneValueArgs NAME) + set(multiValueArgs SOURCES HEADERS DEPENDENCIES OPENCV_DEPENDENCIES INCLUDE_DIRECTORIES) + cmake_parse_arguments(IE_SAMPLE "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + # Find OpenCV components if exist + if(IE_SAMPLE_OPENCV_DEPENDENCIES) + find_package(OpenCV COMPONENTS ${IE_SAMPLE_OPENCV_DEPENDENCIES} QUIET) + if(NOT OpenCV_FOUND) + message(WARNING "OPENCV is disabled or not found, " ${IE_SAMPLE_NAME} " skipped") + return() + else() + add_definitions(-DUSE_OPENCV) + endif() + endif() + + # Create named folders for the sources within the .vcproj + # Empty name lists them directly under the .vcproj + source_group("src" FILES ${IE_SAMPLES_SOURCES}) + if(IE_SAMPLES_HEADERS) + source_group("include" FILES ${IE_SAMPLES_HEADERS}) + endif() + + # Create executable file from sources + add_executable(${IE_SAMPLE_NAME} ${IE_SAMPLE_SOURCES} ${IE_SAMPLES_HEADERS}) + + if(WIN32) + set_target_properties(${IE_SAMPLE_NAME} PROPERTIES COMPILE_PDB_NAME ${IE_SAMPLE_NAME}) + endif() + + if(IE_SAMPLE_INCLUDE_DIRECTORIES) + target_include_directories(${IE_SAMPLE_NAME} PRIVATE ${IE_SAMPLE_INCLUDE_DIRECTORIES}) + endif() + target_include_directories(${IE_SAMPLE_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../common") + + target_link_libraries(${IE_SAMPLE_NAME} PRIVATE ${OpenCV_LIBRARIES} ${InferenceEngine_LIBRARIES} + ${IE_SAMPLE_DEPENDENCIES} IE::ie_cpu_extension gflags) + + if(UNIX) + target_link_libraries(${IE_SAMPLE_NAME} PRIVATE pthread) + endif() + + # create global target with all samples / demo apps + if(NOT TARGET ie_samples) + add_custom_target(ie_samples ALL) + endif() + add_dependencies(ie_samples ${IE_SAMPLE_NAME}) + + if(COMMAND add_cpplint_target AND NOT IE_SAMPLE_EXCLUDE_CPPLINT) + add_cpplint_target(${IE_SAMPLE_NAME}_cpplint FOR_TARGETS ${IE_SAMPLE_NAME}) + endif() +endmacro() + + # use this flag if you need to throw custom message in case if the IE package is not found. if (IE_NOT_FOUND_MESSAGE) - find_package(InferenceEngine 1.6 QUIET) + find_package(InferenceEngine 2.0 QUIET) if (NOT(InferenceEngine_FOUND)) message(FATAL_ERROR ${IE_NOT_FOUND_MESSAGE}) endif() else() - find_package(InferenceEngine 1.6 REQUIRED) -endif() - -if (UNIX) - set (LIB_DL dl) + find_package(InferenceEngine 2.0 REQUIRED) endif() # collect all samples subdirectories file(GLOB samples_dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *) # skip building of unnecessary subdirectories -list(REMOVE_ITEM samples_dirs archived common thirdparty) +list(REMOVE_ITEM samples_dirs common thirdparty) add_samples_to_build(${samples_dirs}) diff --git a/inference-engine/samples/benchmark_app/CMakeLists.txt b/inference-engine/samples/benchmark_app/CMakeLists.txt index c142ea607b2fd0..bd2d6672d9eb96 100644 --- a/inference-engine/samples/benchmark_app/CMakeLists.txt +++ b/inference-engine/samples/benchmark_app/CMakeLists.txt @@ -2,26 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 # -set (TARGET_NAME "benchmark_app") - -file (GLOB SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${SRC}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${SRC}) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} IE::ie_cpu_extension format_reader gflags) - -if(UNIX) - target_link_libraries(${TARGET_NAME} ${LIB_DL} pthread) -endif() +file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp) + +ie_add_sample(NAME benchmark_app + SOURCES ${SRC} + HEADERS ${HDR} + DEPENDENCIES format_reader + OPENCV_DEPENDENCIES imgcodecs) diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md index 8cba50dfa681d5..b1bde478a55725 100644 --- a/inference-engine/samples/benchmark_app/README.md +++ b/inference-engine/samples/benchmark_app/README.md @@ -1,29 +1,27 @@ -# Benchmark Application C++ Demo +# Benchmark C++ Application This topic demonstrates how to use the Benchmark Application to estimate deep learning inference performance on -supported devices. Performance can be measured for two inference modes: synchronous and asynchronous. +supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented). > **NOTE:** This topic describes usage of C++ implementation of the Benchmark Application. For the Python* implementation, refer to [Benchmark Application (Python*)](./inference-engine/ie_bridges/python/sample/benchmark_app/README.md). ## How It Works -> **NOTE:** To achieve benchmark results similar to the official published results, set CPU frequency to 2.9 GHz and GPU frequency to 1 GHz. - -Upon start-up, the application reads command-line parameters and loads a network and images to the Inference Engine +Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend on the mode defined with the `-api` command-line parameter. > **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). If you run the application in the synchronous mode, it creates one infer request and executes the `Infer` method. -If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq` -command-line parameter and executes the `StartAsync` method for each of them. +If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq` command-line parameter and executes the `StartAsync` method for each of them. If `-nireq` is not set, the demo will use the default value for specified device. -The `Wait` method is used to wait for a previous execution of an infer request to complete. A number of execution steps -is defined by one of the two values: +A number of execution steps is defined by one of the following parameters: * Number of iterations specified with the `-niter` command-line argument -* Predefined duration if `-niter` is not specified. Predefined duration value depends on device. +* Time duration specified with the `-t` command-line argument +* Both of them (execution will continue until both conditions are met) +* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on a device. During the execution, the application collects latency for each executed infer request. @@ -37,10 +35,10 @@ Throughput value also depends on batch size. The application also collects per-layer Performance Measurement (PM) counters for each executed infer request if you enable statistics dumping by setting the `-report_type` parameter to one of the possible values: * `no_counters` report includes configuration options specified, resulting FPS and latency. -* `median_counters` report extends the `no_counters` report and additionally includes median PM counters values for each layer from the network. -* `detailed_counters` report extends the `median_counters` report and additionally includes per-layer PM counters and latency for each executed infer request. +* `average_counters` report extends the `no_counters` report and additionally includes average PM counters values for each layer from the network. +* `detailed_counters` report extends the `average_counters` report and additionally includes per-layer PM counters and latency for each executed infer request. -Depending on the type, the report is stored to `benchmark_no_counters_report.csv`, `benchmark_median_counters_report.csv`, +Depending on the type, the report is stored to `benchmark_no_counters_report.csv`, `benchmark_average_counters_report.csv`, or `benchmark_detailed_counters_report.csv` file located in the path specified in `-report_folder`. The application also saves executable graph information serialized to a XML file if you specify a path to it with the @@ -48,9 +46,16 @@ The application also saves executable graph information serialized to a XML file ## Running +Notice that the benchmark_app usually produces optimal performance for any device out of the box. + +**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.: +``` +$benchmark_app -m -i -d CPU +``` +As explained in the [Introduction to Performance Topics](./docs/IE_DG/Intro_to_Performance.md) section, it is preferable to use the FP16 IR for the model. Running the application with the `-h` option yields the following usage message: -```sh +``` ./benchmark_app -h InferenceEngine: API version ............ @@ -60,34 +65,44 @@ InferenceEngine: benchmark_app [OPTION] Options: - -h Print a usage message - -i "" Required. Path to a folder with images or to image files. + -h, --help Print a usage message + -i "" Optional. Path to a folder with images and/or binaries or to specific image or binary file. -m "" Required. Path to an .xml file with a trained model. - -pp "" Optional. Path to a plugin folder. - -d "" Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. The application looks for a suitable plugin for the specified device. + -d "" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. + Use "-d HETERO:" format to specify HETERO plugin. + The application looks for a suitable plugin for the specified device. -l "" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations. Or -c "" Required for GPU custom kernels. Absolute path to an .xml file with the kernels description. -api "" Optional. Enable Sync/Async API. Default value is "async". -niter "" Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. - -nireq "" Optional. Number of infer requests. Default value is 2. + -nireq "" Optional. Number of infer requests. Default value is determined automatically for a device. -b "" Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation. -stream_output Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output. + -t Optional. Time in seconds to execute topology. + -progress Optional. Show progress bar (can affect performance measurement). Default values is "false". CPU-specific performance options: - -nthreads "" Optional. Number of threads to use for inference on the CPU (including HETERO cases). + -nstreams "" Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode + (for HETERO device case use format :,: or just ). + -nthreads "" Optional. Number of threads to use for inference on the CPU (including HETERO case). -pin "YES"/"NO" Optional. Enable ("YES" is default value) or disable ("NO") CPU threads pinning for CPU-involved inference. Statistics dumping options: - -report_type "" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "median_counters" report extends "no_counters" report and additionally includes median PM counters values for each layer from the network. "detailed_counters" report extends "median_counters" report and additionally includes per-layer PM counters and latency for each executed infer request. + -report_type "" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request. -report_folder Optional. Path to a folder where statistics report is stored. -exec_graph_path Optional. Path to a file where to store executable graph information serialized. + -pc Optional. Report performance counters. ``` Running the application with the empty list of options yields the usage message given above and an error message. -You can run the application for one input layer four-dimensional models that support images as input, for example, public -AlexNet and GoogLeNet models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). +Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values. +If a model has only image input(s), please a provide folder with images or a path to an image as input. +If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input. +If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. + +To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). > **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). @@ -103,23 +118,26 @@ For the asynchronous mode: ./benchmark_app -i /inputImage.bmp -m /alexnet_fp32.xml -d CPU -api async ``` - ## Demo Output -The application outputs latency and throughput. Additionally, if you set the `-report_type` parameter, the application -outputs statistics report. If you set `-exec_graph_path`, the application reports executable graph information serialized. -Progress bar shows the progress of each execution step: +The application outputs the number of executed iterations, total duration of execution, latency and throughput. +Additionally, if you set the `-report_type` parameter, the application outputs statistics report. +If you set the `-pc` parameter, the application outputs performance counters. +If you set `-exec_graph_path`, the application reports executable graph information serialized. ``` -[Step 7/8] Start inference asynchronously (100 async inference executions, 4 inference requests in parallel) +[Step 8/9] Measuring performance (Start inference asyncronously, 60000 ms duration, 4 inference requests in parallel using 4 streams) Progress: [....................] 100.00% done -[Step 8/8] Dump statistics report -[ INFO ] statistics report is stored to benchmark_detailed_counters_report.csv +[Step 9/9] Dumping statistics report +[ INFO ] Statistics collecting was not requested. No reports are dumped. Progress: [....................] 100.00% done -Latency: 73.33 ms -Throughput: 53.28 FPS +Count: 4612 iterations +Duration: 60110.04 ms +Latency: 50.99 ms +Throughput: 76.73 FPS + ``` All measurements including per-layer PM counters are reported in milliseconds. @@ -127,5 +145,5 @@ All measurements including per-layer PM counters are reported in milliseconds. ## See Also * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) -* [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) +* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) * [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp index 8320fb766ded48..6b6991fcfef966 100644 --- a/inference-engine/samples/benchmark_app/benchmark_app.hpp +++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp @@ -9,46 +9,39 @@ #include #include -#ifdef _WIN32 -#include -#else -#include -#include -#endif - /// @brief message for help argument static const char help_message[] = "Print a usage message"; /// @brief message for images argument -static const char image_message[] = "Required. Path to a folder with images or to image files."; - -/// @brief message for images argument -static const char multi_input_message[] = "Path to multi input file containing."; +static const char input_message[] = "Optional. Path to a folder with images and/or binaries or to specific image or binary file."; /// @brief message for model argument static const char model_message[] = "Required. Path to an .xml file with a trained model."; -/// @brief message for plugin_path argument -static const char plugin_path_message[] = "Optional. Path to a plugin folder."; - /// @brief message for execution mode static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\"."; /// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. Default value is CPU. " \ -"Use \"-d HETERO:\" format to specify HETERO plugin. " \ -"The application looks for a suitable plugin for the specified device."; +static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). " \ +"Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. "; /// @brief message for iterations count static const char iterations_count_message[] = "Optional. Number of iterations. " \ "If not specified, the number of iterations is calculated depending on a device."; /// @brief message for requests count -static const char infer_requests_count_message[] = "Optional. Number of infer requests. Default value is 2."; +static const char infer_requests_count_message[] = "Optional. Number of infer requests. Default value is determined automatically for device."; + +/// @brief message for execution time +static const char execution_time_message[] = "Optional. Time in seconds to execute topology."; /// @brief message for #threads for CPU inference static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU " - "(including HETERO cases)."; + "(including HETERO case)."; + +/// @brief message for #streams for CPU inference +static const char infer_num_streams_message[] = "Optional. Number of streams to use for inference on the CPU or/and GPU in throughput mode " + "(for HETERO device case use format :,: or just )"; /// @brief message for user library argument static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels implementations."; @@ -68,10 +61,10 @@ static const char stream_output_message[] = "Optional. Print progress as a plain // @brief message for report_type option static const char report_type_message[] = "Optional. Enable collecting statistics report. \"no_counters\" report contains " - "configuration options specified, resulting FPS and latency. \"median_counters\" " - "report extends \"no_counters\" report and additionally includes median PM " + "configuration options specified, resulting FPS and latency. \"average_counters\" " + "report extends \"no_counters\" report and additionally includes average PM " "counters values for each layer from the network. \"detailed_counters\" report " - "extends \"median_counters\" report and additionally includes per-layer PM " + "extends \"average_counters\" report and additionally includes per-layer PM " "counters and latency for each executed infer request."; // @brief message for report_folder option @@ -80,20 +73,26 @@ static const char report_folder_message[] = "Optional. Path to a folder where st // @brief message for exec_graph_path option static const char exec_graph_path_message[] = "Optional. Path to a file where to store executable graph information serialized."; +// @brief message for progress bar option +static const char progress_message[] = "Optional. Show progress bar (can affect performance measurement). Default values is \"false\"."; + +// @brief message for performance counters option +static const char pc_message[] = "Optional. Report performance counters."; + /// @brief Define flag for showing help message
DEFINE_bool(h, false, help_message); +/// @brief Declare flag for showing help message
+DECLARE_bool(help); + /// @brief Define parameter for set image file
/// i or mif is a required parameter -DEFINE_string(i, "", image_message); +DEFINE_string(i, "", input_message); /// @brief Define parameter for set model file
/// It is a required parameter DEFINE_string(m, "", model_message); -/// @brief Define parameter for set path to plugins
-DEFINE_string(pp, "", plugin_path_message); - /// @brief Define execution mode DEFINE_string(api, "async", api_message); @@ -113,12 +112,18 @@ DEFINE_string(c, "", custom_cldnn_message); /// Async mode: StartAsync counts DEFINE_uint32(niter, 0, iterations_count_message); +/// @brief Time to execute topology in seconds +DEFINE_uint32(t, 0, execution_time_message); + /// @brief Number of infer requests in parallel -DEFINE_uint32(nireq, 2, infer_requests_count_message); +DEFINE_uint32(nireq, 0, infer_requests_count_message); -/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases) +/// @brief Number of threads to use for inference on the CPU in throughput mode (also affects Hetero cases) DEFINE_uint32(nthreads, 0, infer_num_threads_message); +/// @brief Number of streams to use for inference on the CPU (also affects Hetero cases) +DEFINE_string(nstreams, "", infer_num_streams_message); + /// @brief Define parameter for batch size
/// Default is 0 (that means don't specify) DEFINE_uint32(b, 0, batch_size_message); @@ -138,6 +143,12 @@ DEFINE_string(report_folder, "", report_folder_message); /// @brief Path to a file where to store executable graph information serialized DEFINE_string(exec_graph_path, "", exec_graph_path_message); +/// @brief Define flag for showing progress bar
+DEFINE_bool(progress, false, progress_message); + +/// @brief Define flag for showing performance counters
+DEFINE_bool(pc, false, pc_message); + /** * @brief This function show a help message */ @@ -146,10 +157,9 @@ static void showUsage() { std::cout << "benchmark_app [OPTION]" << std::endl; std::cout << "Options:" << std::endl; std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -i \"\" " << image_message << std::endl; + std::cout << " -h, --help " << help_message << std::endl; + std::cout << " -i \"\" " << input_message << std::endl; std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -pp \"\" " << plugin_path_message << std::endl; std::cout << " -d \"\" " << target_device_message << std::endl; std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; std::cout << " Or" << std::endl; @@ -159,11 +169,15 @@ static void showUsage() { std::cout << " -nireq \"\" " << infer_requests_count_message << std::endl; std::cout << " -b \"\" " << batch_size_message << std::endl; std::cout << " -stream_output " << stream_output_message << std::endl; - std::cout << std::endl << " CPU-specific performance options:" << std::endl; + std::cout << " -t " << execution_time_message << std::endl; + std::cout << " -progress " << progress_message << std::endl; + std::cout << std::endl << " device-specific performance options:" << std::endl; + std::cout << " -nstreams \"\" " << infer_num_streams_message << std::endl; std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; std::cout << " -pin \"YES\"/\"NO\" " << infer_threads_pinning_message << std::endl; std::cout << std::endl << " Statistics dumping options:" << std::endl; std::cout << " -report_type \"\" " << report_type_message << std::endl; std::cout << " -report_folder " << report_folder_message << std::endl; std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl; + std::cout << " -pc " << pc_message << std::endl; } diff --git a/inference-engine/samples/benchmark_app/infer_request_wrap.hpp b/inference-engine/samples/benchmark_app/infer_request_wrap.hpp index 741ee1961a6d0e..ff46f2b4219ed5 100644 --- a/inference-engine/samples/benchmark_app/infer_request_wrap.hpp +++ b/inference-engine/samples/benchmark_app/infer_request_wrap.hpp @@ -4,25 +4,40 @@ #pragma once +#include +#include #include #include #include #include +#include +#include +#include +#include #include "inference_engine.hpp" +#include "statistics_report.hpp" typedef std::chrono::high_resolution_clock Time; typedef std::chrono::nanoseconds ns; +typedef std::function QueueCallbackFunction; + /// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution time. -class InferReqWrap { +class InferReqWrap final { public: using Ptr = std::shared_ptr; - explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net) : _request(net.CreateInferRequest()) { + ~InferReqWrap() = default; + + explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue) : + _request(net.CreateInferRequest()), + _id(id), + _callbackQueue(callbackQueue) { _request.SetCompletionCallback( [&]() { _endTime = Time::now(); + _callbackQueue(_id, getExecutionTimeInMilliseconds()); }); } @@ -35,24 +50,18 @@ class InferReqWrap { _startTime = Time::now(); _request.Infer(); _endTime = Time::now(); + _callbackQueue(_id, getExecutionTimeInMilliseconds()); } std::map getPerformanceCounts() { return _request.GetPerformanceCounts(); } - void wait() { - InferenceEngine::StatusCode code = _request.Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY); - if (code != InferenceEngine::StatusCode::OK) { - throw std::logic_error("Wait"); - } - } - InferenceEngine::Blob::Ptr getBlob(const std::string &name) { return _request.GetBlob(name); } - double getExecTime() const { + double getExecutionTimeInMilliseconds() const { auto execTime = std::chrono::duration_cast(_endTime - _startTime); return static_cast(execTime.count()) * 0.000001; } @@ -61,4 +70,67 @@ class InferReqWrap { InferenceEngine::InferRequest _request; Time::time_point _startTime; Time::time_point _endTime; -}; \ No newline at end of file + size_t _id; + QueueCallbackFunction _callbackQueue; +}; + +class InferRequestsQueue final { +public: + InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) { + for (size_t id = 0; id < nireq; id++) { + requests.push_back(std::make_shared(net, id, std::bind(&InferRequestsQueue::putIdleRequest, this, + std::placeholders::_1, + std::placeholders::_2))); + _idleIds.push(id); + } + resetTimes(); + } + ~InferRequestsQueue() = default; + + void resetTimes() { + _startTime = Time::time_point::max(); + _endTime = Time::time_point::min(); + _latencies.clear(); + } + + double getDurationInMilliseconds() { + return std::chrono::duration_cast(_endTime - _startTime).count() * 0.000001; + } + + void putIdleRequest(size_t id, + const double latency) { + std::unique_lock lock(_mutex); + _latencies.push_back(latency); + _idleIds.push(id); + _endTime = std::max(Time::now(), _endTime); + _cv.notify_one(); + } + + InferReqWrap::Ptr getIdleRequest() { + std::unique_lock lock(_mutex); + _cv.wait(lock, [this]{ return _idleIds.size() > 0; }); + auto request = requests.at(_idleIds.front()); + _idleIds.pop(); + _startTime = std::min(Time::now(), _startTime); + return request; + } + + void waitAll() { + std::unique_lock lock(_mutex); + _cv.wait(lock, [this]{ return _idleIds.size() == requests.size(); }); + } + + std::vector getLatencies() { + return _latencies; + } + + std::vector requests; + +private: + std::queue_idleIds; + std::mutex _mutex; + std::condition_variable _cv; + Time::time_point _startTime; + Time::time_point _endTime; + std::vector _latencies; +}; diff --git a/inference-engine/samples/benchmark_app/inputs_filling.cpp b/inference-engine/samples/benchmark_app/inputs_filling.cpp new file mode 100644 index 00000000000000..4a7901316a0aab --- /dev/null +++ b/inference-engine/samples/benchmark_app/inputs_filling.cpp @@ -0,0 +1,295 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include +#include + +#include "inputs_filling.hpp" + +using namespace InferenceEngine; + +#ifdef USE_OPENCV +static const std::vector supported_image_extensions = { "bmp", "dib", + "jpeg", "jpg", "jpe", + "jp2", + "png", + "pbm", "pgm", "ppm", + "sr", "ras", + "tiff", "tif" }; +#else +static const std::vector supported_image_extensions = { "bmp" }; +#endif +static const std::vector supported_binary_extensions = { "bin" }; + +std::vector filterFilesByExtensions(const std::vector& filePaths, + const std::vector& extensions) { + std::vector filtered; + auto getExtension = [](const std::string &name) { + auto extensionPosition = name.rfind('.', name.size()); + return extensionPosition == std::string::npos ? "" : name.substr(extensionPosition + 1, name.size() - 1); + }; + for (auto& filePath : filePaths) { + auto extension = getExtension(filePath); + std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower); + if (std::find(extensions.begin(), extensions.end(), extension) != extensions.end()) { + filtered.push_back(filePath); + } + } + return filtered; +} + +void fillBlobImage(Blob::Ptr& inputBlob, + const std::vector& filePaths, + const size_t& batchSize, + const InputInfo& info, + const size_t& requestId, + const size_t& inputId, + const size_t& inputSize) { + auto inputBlobData = inputBlob->buffer().as(); + const TensorDesc& inputBlobDesc = inputBlob->getTensorDesc(); + + /** Collect images data ptrs **/ + std::vector> vreader; + vreader.reserve(batchSize); + + for (size_t i = 0ULL, inputIndex = requestId*batchSize*inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) { + inputIndex %= filePaths.size(); + + slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl; + FormatReader::ReaderPtr reader(filePaths[inputIndex].c_str()); + if (reader.get() == nullptr) { + slog::warn << "Image " << filePaths[inputIndex] << " cannot be read!" << slog::endl << slog::endl; + continue; + } + + /** Getting image data **/ + TensorDesc desc = info.getTensorDesc(); + std::shared_ptr imageData(reader->getData(getTensorWidth(desc), getTensorHeight(desc))); + if (imageData) { + vreader.push_back(imageData); + } + } + + /** Fill input tensor with images. First b channel, then g and r channels **/ + const size_t numChannels = getTensorChannels(inputBlobDesc); + const size_t imageSize = getTensorWidth(inputBlobDesc) * getTensorHeight(inputBlobDesc); + /** Iterate over all input images **/ + for (size_t imageId = 0; imageId < vreader.size(); ++imageId) { + /** Iterate over all pixel in image (b,g,r) **/ + for (size_t pid = 0; pid < imageSize; pid++) { + /** Iterate over all channels **/ + for (size_t ch = 0; ch < numChannels; ++ch) { + /** [images stride + channels stride + pixel id ] all in bytes **/ + inputBlobData[imageId * imageSize * numChannels + ch * imageSize + pid] = vreader.at(imageId).get()[pid*numChannels + ch]; + } + } + } +} + +template +void fillBlobBinary(Blob::Ptr& inputBlob, + const std::vector& filePaths, + const size_t& batchSize, + const size_t& requestId, + const size_t& inputId, + const size_t& inputSize) { + auto inputBlobData = inputBlob->buffer().as(); + for (size_t i = 0ULL, inputIndex = requestId*batchSize*inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) { + inputIndex %= filePaths.size(); + + slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl; + std::ifstream binaryFile(filePaths[inputIndex], std::ios_base::binary | std::ios_base::ate); + if (!binaryFile) { + THROW_IE_EXCEPTION << "Cannot open " << filePaths[inputIndex]; + } + + auto fileSize = static_cast(binaryFile.tellg()); + binaryFile.seekg(0, std::ios_base::beg); + if (!binaryFile.good()) { + THROW_IE_EXCEPTION << "Can not read " << filePaths[inputIndex]; + } + + auto inputSize = inputBlob->size()*sizeof(T)/batchSize; + if (fileSize != inputSize) { + THROW_IE_EXCEPTION << "File " << filePaths[inputIndex] << " contains " << std::to_string(fileSize) << " bytes " + "but the network expects " << std::to_string(inputSize); + } + binaryFile.read(reinterpret_cast(&inputBlobData[i*inputSize]), inputSize); + } +} + +template +void fillBlobRandom(Blob::Ptr& inputBlob) { + auto inputBlobData = inputBlob->buffer().as(); + for (size_t i = 0; i < inputBlob->size(); i++) { + inputBlobData[i] = (T) rand() / RAND_MAX * 10; + } +} + +template +void fillBlobImInfo(Blob::Ptr& inputBlob, + const size_t& batchSize, + std::pair image_size) { + auto inputBlobData = inputBlob->buffer().as(); + for (size_t b = 0; b < batchSize; b++) { + size_t iminfoSize = inputBlob->size()/batchSize; + for (size_t i = 0; i < iminfoSize; i++) { + size_t index = b*iminfoSize + i; + if (0 == i) + inputBlobData[index] = static_cast(image_size.first); + else if (1 == i) + inputBlobData[index] = static_cast(image_size.second); + else + inputBlobData[index] = 1; + } + } +} + +void fillBlobs(const std::vector& inputFiles, + const size_t& batchSize, + const InferenceEngine::InputsDataMap& info, + std::vector requests) { + std::vector> input_image_sizes; + for (const InputsDataMap::value_type& item : info) { + if (isImage(item.second)) { + input_image_sizes.push_back(std::make_pair(getTensorWidth(item.second->getTensorDesc()), + getTensorHeight(item.second->getTensorDesc()))); + } + slog::info << "Network input '" << item.first << "' precision " << item.second->getTensorDesc().getPrecision() + << ", dimensions (" << item.second->getTensorDesc().getLayout() << "): "; + for (const auto& i : item.second->getTensorDesc().getDims()) { + slog::info << i << " "; + } + slog::info << slog::endl; + } + + size_t imageInputCount = input_image_sizes.size(); + size_t binaryInputCount = info.size() - imageInputCount; + + std::vector binaryFiles; + std::vector imageFiles; + + if (inputFiles.empty()) { + slog::warn << "No input files were given: all inputs will be filled with random values!" << slog::endl; + } else { + binaryFiles = filterFilesByExtensions(inputFiles, supported_binary_extensions); + std::sort(std::begin(binaryFiles), std::end(binaryFiles)); + + auto binaryToBeUsed = binaryInputCount*batchSize*requests.size(); + if (binaryToBeUsed > 0 && binaryFiles.empty()) { + std::stringstream ss; + for (auto& ext : supported_binary_extensions) { + if (!ss.str().empty()) { + ss << ", "; + } + ss << ext; + } + slog::warn << "No supported binary inputs found! Please check your file extensions: " << ss.str() << slog::endl; + } else if (binaryToBeUsed > binaryFiles.size()) { + slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed << + " files are required but only " << binaryFiles.size() << " are provided" << slog::endl; + } else if (binaryToBeUsed < binaryFiles.size()) { + slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << + " are required from " << binaryFiles.size() << slog::endl; + } + + imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions); + std::sort(std::begin(imageFiles), std::end(imageFiles)); + + auto imagesToBeUsed = imageInputCount*batchSize*requests.size(); + if (imagesToBeUsed > 0 && imageFiles.empty()) { + std::stringstream ss; + for (auto& ext : supported_image_extensions) { + if (!ss.str().empty()) { + ss << ", "; + } + ss << ext; + } + slog::warn << "No supported image inputs found! Please check your file extensions: " << ss.str() << slog::endl; + } else if (imagesToBeUsed > imageFiles.size()) { + slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed << + " files are required but only " << imageFiles.size() << " are provided" << slog::endl; + } else if (imagesToBeUsed < imageFiles.size()) { + slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << + " are required from " << imageFiles.size() << slog::endl; + } + } + + for (size_t requestId = 0; requestId < requests.size(); requestId++) { + slog::info << "Infer Request " << requestId << " filling" << slog::endl; + + size_t imageInputId = 0; + size_t binaryInputId = 0; + for (const InputsDataMap::value_type& item : info) { + Blob::Ptr inputBlob = requests.at(requestId)->getBlob(item.first); + if (isImage(inputBlob)) { + if (!imageFiles.empty()) { + // Fill with Images + fillBlobImage(inputBlob, imageFiles, batchSize, *item.second, requestId, imageInputId++, imageInputCount); + continue; + } + } else { + if (!binaryFiles.empty()) { + // Fill with binary files + if (item.second->getPrecision() == InferenceEngine::Precision::FP32) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else if (item.second->getPrecision() == InferenceEngine::Precision::FP16) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else if (item.second->getPrecision() == InferenceEngine::Precision::I32) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else if (item.second->getPrecision() == InferenceEngine::Precision::U8) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else { + THROW_IE_EXCEPTION << "Input precision is not supported for " << item.first; + } + continue; + } + + if (isImageInfo(inputBlob) && (input_image_sizes.size() == 1)) { + // Most likely it is image info: fill with image information + auto image_size = input_image_sizes.at(0); + slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x" + << image_size.second << slog::endl; + if (item.second->getPrecision() == InferenceEngine::Precision::FP32) { + fillBlobImInfo(inputBlob, batchSize, image_size); + } else if (item.second->getPrecision() == InferenceEngine::Precision::FP16) { + fillBlobImInfo(inputBlob, batchSize, image_size); + } else if (item.second->getPrecision() == InferenceEngine::Precision::I32) { + fillBlobImInfo(inputBlob, batchSize, image_size); + } else { + THROW_IE_EXCEPTION << "Input precision is not supported for image info!"; + } + continue; + } + } + // Fill random + slog::info << "Fill input '" << item.first << "' with random values (" + << std::string((isImage(inputBlob) ? "image" : "some binary data")) + << " is expected)" << slog::endl; + if (item.second->getPrecision() == InferenceEngine::Precision::FP32) { + fillBlobRandom(inputBlob); + } else if (item.second->getPrecision() == InferenceEngine::Precision::FP16) { + fillBlobRandom(inputBlob); + } else if (item.second->getPrecision() == InferenceEngine::Precision::I32) { + fillBlobRandom(inputBlob); + } else if (item.second->getPrecision() == InferenceEngine::Precision::U8) { + fillBlobRandom(inputBlob); + } else if (item.second->getPrecision() == InferenceEngine::Precision::I8) { + fillBlobRandom(inputBlob); + } else if (item.second->getPrecision() == InferenceEngine::Precision::U16) { + fillBlobRandom(inputBlob); + } else if (item.second->getPrecision() == InferenceEngine::Precision::I16) { + fillBlobRandom(inputBlob); + } else { + THROW_IE_EXCEPTION << "Input precision is not supported for " << item.first; + } + } + } +} diff --git a/inference-engine/samples/benchmark_app/inputs_filling.hpp b/inference-engine/samples/benchmark_app/inputs_filling.hpp new file mode 100644 index 00000000000000..65c4d27deabf68 --- /dev/null +++ b/inference-engine/samples/benchmark_app/inputs_filling.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +#include "infer_request_wrap.hpp" + +template +static bool isImage(const T &blob) { + auto descriptor = blob->getTensorDesc(); + if (descriptor.getLayout() != InferenceEngine::NCHW) { + return false; + } + auto channels = descriptor.getDims()[1]; + return channels == 3; +} + +template +static bool isImageInfo(const T &blob) { + auto descriptor = blob->getTensorDesc(); + if (descriptor.getLayout() != InferenceEngine::NC) { + return false; + } + auto channels = descriptor.getDims()[1]; + return (channels >= 2); +} + +void fillBlobs(const std::vector& inputFiles, + const size_t& batchSize, + const InferenceEngine::InputsDataMap& info, + std::vector requests); diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp index 56b12a52af9f6a..892bd5daf5566d 100644 --- a/inference-engine/samples/benchmark_app/main.cpp +++ b/inference-engine/samples/benchmark_app/main.cpp @@ -12,9 +12,8 @@ #include #include -#include - #include +#include #include #include #include @@ -23,47 +22,42 @@ #include "infer_request_wrap.hpp" #include "progress_bar.hpp" #include "statistics_report.hpp" +#include "inputs_filling.hpp" +#include "utils.hpp" using namespace InferenceEngine; -long long getDurationInNanoseconds(const std::string& device); +static const size_t progressBarDefaultTotalCount = 1000; -void fillBlobWithImage( - Blob::Ptr& inputBlob, - const std::vector& filePaths, - const size_t& batchSize, - const InferenceEngine::InputInfo& info); +uint64_t getDurationInMilliseconds(uint32_t duration) { + return duration * 1000LL; +} -static const size_t progressBarDefaultTotalCount = 1000; +uint64_t getDurationInNanoseconds(uint32_t duration) { + return duration * 1000000000LL; +} bool ParseAndCheckCommandLine(int argc, char *argv[]) { - // ---------------------------Parsing and validation of input args-------------------------------------- + // ---------------------------Parsing and validating input arguments-------------------------------------- slog::info << "Parsing input parameters" << slog::endl; gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); - if (FLAGS_h) { + if (FLAGS_help || FLAGS_h) { showUsage(); + showAvailableDevices(); return false; } if (FLAGS_m.empty()) { - throw std::logic_error("Model required is not set. Please use -h."); - } - - if (FLAGS_api.empty()) { - throw std::logic_error("API not selected. Please use -h."); + throw std::logic_error("Model is required but not set. Please set -m option."); } if (FLAGS_api != "async" && FLAGS_api != "sync") { - throw std::logic_error("Incorrect API. Please use -h."); - } - - if (FLAGS_i.empty()) { - throw std::logic_error("Input is not set. Please use -h."); + throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value."); } if (!FLAGS_report_type.empty() && - FLAGS_report_type != noCntReport && FLAGS_report_type != medianCntReport && FLAGS_report_type != detailedCntReport) { - std::string err = "only " + std::string(noCntReport) + "/" + std::string(medianCntReport) + "/" + std::string(detailedCntReport) + + FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport && FLAGS_report_type != detailedCntReport) { + std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" + std::string(detailedCntReport) + " report types are supported (invalid -report_type option value)"; throw std::logic_error(err); } @@ -71,378 +65,363 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { return true; } +static void next_step(const std::string additional_info = "") { + static size_t step_id = 0; + static const std::map step_names = { + { 1, "Parsing and validating input arguments" }, + { 2, "Loading Inference Engine" }, + { 3, "Reading the Intermediate Representation network" }, + { 4, "Resizing network to match image sizes and given batch" }, + { 5, "Configuring input of the model" }, + { 6, "Setting device configuration" }, + { 7, "Loading the model to the device" }, + { 8, "Setting optimal runtime parameters" }, + { 9, "Creating infer requests and filling input blobs with images" }, + { 10, "Measuring performance" }, + { 11, "Dumping statistics report" } + }; + + step_id++; + if (step_names.count(step_id) == 0) + THROW_IE_EXCEPTION << "Step ID " << step_id << " is out of total steps number " << step_names.size(); + + std::cout << "[Step " << step_id << "/" << step_names.size() << "] " << step_names.at(step_id) + << (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl; +} + /** -* @brief The entry point the benchmark application +* @brief The entry point of the benchmark application */ int main(int argc, char *argv[]) { try { - slog::info << "InferenceEngine: " << InferenceEngine::GetInferenceEngineVersion() << slog::endl; - - // ------------------------------ Parsing and validation of input args --------------------------------- - std::cout << std::endl << "[Step 1/8] Parsing and validation of input args" << std::endl; - ProgressBar progressBar(1, FLAGS_stream_output); + // ----------------- 1. Parsing and validating input arguments ------------------------------------------------- + next_step(); if (!ParseAndCheckCommandLine(argc, argv)) { return 0; } /** This vector stores paths to the processed images **/ - std::vector inputImages; - parseInputFilesArguments(inputImages); - if (inputImages.size() == 0ULL) { - throw std::logic_error("no images found"); - } - progressBar.addProgress(1); - progressBar.finish(); + std::vector inputFiles; + parseInputFilesArguments(inputFiles); - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- + // ----------------- 2. Loading the Inference Engine ----------------------------------------------------------- + next_step(); - std::cout << "[Step 2/8] Loading plugin" << std::endl; - progressBar.newBar(1); + // Get optimal runtime parameters for device + std::string device_name = FLAGS_d; - InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d); + Core ie; if (FLAGS_d.find("CPU") != std::string::npos) { - // Loading default CPU etensions - plugin.AddExtension(std::make_shared()); + // Loading default CPU extensions + ie.AddExtension(std::make_shared(), "CPU"); if (!FLAGS_l.empty()) { // CPU (MKLDNN) extensions is loaded as a shared library and passed as a pointer to base extension const auto extension_ptr = InferenceEngine::make_so_pointer(FLAGS_l); - plugin.AddExtension(extension_ptr); + ie.AddExtension(extension_ptr, "CPU"); slog::info << "CPU (MKLDNN) extensions is loaded " << FLAGS_l << slog::endl; } } if ((FLAGS_d.find("GPU") != std::string::npos) && !FLAGS_c.empty()) { // Load clDNN Extensions - plugin.SetConfig({ {CONFIG_KEY(CONFIG_FILE), FLAGS_c} }); + ie.SetConfig({ {CONFIG_KEY(CONFIG_FILE), FLAGS_c} }); slog::info << "GPU extensions is loaded " << FLAGS_c << slog::endl; } - InferenceEngine::ResponseDesc resp; - if (FLAGS_d == "MYRIAD") { - plugin.SetConfig({ {CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_INFO)}, {VPU_CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_INFO)} }); - } - - const Version *pluginVersion = plugin.GetVersion(); - slog::info << pluginVersion << slog::endl; - - progressBar.addProgress(1); - progressBar.finish(); - - // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ + slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; + slog::info << "Device info: " << slog::endl; + std::cout << ie.GetVersions(device_name) << std::endl; - std::cout << "[Step 3/8] Read IR network" << std::endl; - progressBar.newBar(1); + // ----------------- 3. Reading the Intermediate Representation network ---------------------------------------- + next_step(); slog::info << "Loading network files" << slog::endl; - InferenceEngine::CNNNetReader netBuilder; + CNNNetReader netBuilder; netBuilder.ReadNetwork(FLAGS_m); const std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin"; netBuilder.ReadWeights(binFileName); - InferenceEngine::CNNNetwork cnnNetwork = netBuilder.getNetwork(); - const InferenceEngine::InputsDataMap inputInfo(cnnNetwork.getInputsInfo()); + CNNNetwork cnnNetwork = netBuilder.getNetwork(); + const InputsDataMap inputInfo(cnnNetwork.getInputsInfo()); if (inputInfo.empty()) { throw std::logic_error("no inputs info is provided"); } - if (inputInfo.size() != 1) { - throw std::logic_error("only networks with one input are supported"); - } - - // --------------------------- 3. Resize network to match image sizes and given batch---------------------- + // ----------------- 4. Resizing network to match image sizes and given batch ---------------------------------- + next_step(); if (FLAGS_b != 0) { - // We support models having only one input layers ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes(); - const ICNNNetwork::InputShapes::iterator& it = shapes.begin(); - if (it->second.size() != 4) { - throw std::logic_error("Unsupported model for batch size changing in automatic mode"); + bool reshape = false; + for (const InputsDataMap::value_type& item : inputInfo) { + auto layout = item.second->getTensorDesc().getLayout(); + + int batchIndex = -1; + if ((layout == Layout::NCHW) || (layout == Layout::NCDHW) || + (layout == Layout::NHWC) || (layout == Layout::NDHWC) || + (layout == Layout::NC)) { + batchIndex = 0; + } else if (layout == CN) { + batchIndex = 1; + } + if ((batchIndex != -1) && (shapes[item.first][batchIndex] != FLAGS_b)) { + shapes[item.first][batchIndex] = FLAGS_b; + reshape = true; + } + } + if (reshape) { + slog::info << "Resizing network to batch = " << FLAGS_b << slog::endl; + cnnNetwork.reshape(shapes); } - it->second[0] = FLAGS_b; - slog::info << "Resizing network to batch = " << FLAGS_b << slog::endl; - cnnNetwork.reshape(shapes); } const size_t batchSize = cnnNetwork.getBatchSize(); - const Precision precision = inputInfo.begin()->second->getPrecision(); slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << - ", precision: " << precision << slog::endl; + ", precision: " << cnnNetwork.getPrecision() << slog::endl; - progressBar.addProgress(1); - progressBar.finish(); + // ----------------- 5. Configuring input ---------------------------------------------------------------------- + next_step(); - // --------------------------- 4. Configure input & output --------------------------------------------- - - std::cout << "[Step 4/8] Configure input & output of the model" << std::endl; - progressBar.newBar(1); - - const InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::U8; for (auto& item : inputInfo) { - /** Set the precision of input data provided by the user, should be called before load of the network to the plugin **/ - item.second->setInputPrecision(inputPrecision); + if (isImage(item.second)) { + /** Set the precision of input data provided by the user, should be called before load of the network to the device **/ + item.second->setPrecision(Precision::U8); + } } - const size_t imagesCount = inputImages.size(); - if (batchSize > imagesCount) { - slog::warn << "Network batch size " << batchSize << " is greater than images count " << imagesCount << - ", some input files will be duplicated" << slog::endl; - } else if (batchSize < imagesCount) { - slog::warn << "Network batch size " << batchSize << " is less then images count " << imagesCount << - ", some input files will be ignored" << slog::endl; + // ----------------- 6. Setting device configuration ----------------------------------------------------------- + next_step(); + + bool perf_counts = (FLAGS_report_type == detailedCntReport || + FLAGS_report_type == averageCntReport || + FLAGS_pc); + + auto devices = parseDevices(device_name); + std::map device_nstreams = parseValuePerDevice(devices, FLAGS_nstreams); + for (auto& device : devices) { + if (device == "CPU") { // CPU supports few special performance-oriented keys + // limit threading for CPU portion of inference + if (FLAGS_nthreads != 0) + ie.SetConfig({{ CONFIG_KEY(CPU_THREADS_NUM), std::to_string(FLAGS_nthreads) }}, device); + + // pin threads for CPU portion of inference + ie.SetConfig({{ CONFIG_KEY(CPU_BIND_THREAD), FLAGS_pin }}, device); + + // for CPU execution, more throughput-oriented execution via streams + if (FLAGS_api == "async") + ie.SetConfig({{ CONFIG_KEY(CPU_THROUGHPUT_STREAMS), + (device_nstreams.count(device) > 0 ? std::to_string(device_nstreams.at(device)) : + "CPU_THROUGHPUT_AUTO") }}, device); + device_nstreams[device] = std::stoi(ie.GetConfig(device, CONFIG_KEY(CPU_THROUGHPUT_STREAMS)).as()); + } else if (device == ("GPU")) { + if (FLAGS_api == "async") + ie.SetConfig({{ CONFIG_KEY(GPU_THROUGHPUT_STREAMS), + (device_nstreams.count(device) > 0 ? std::to_string(device_nstreams.at(device)) : + "GPU_THROUGHPUT_AUTO") }}, device); + device_nstreams[device] = std::stoi(ie.GetConfig(device, CONFIG_KEY(GPU_THROUGHPUT_STREAMS)).as()); + } else if (device == "MYRIAD") { + ie.SetConfig({{ CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_NONE) }, + { VPU_CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_WARNING) }}, device); + } } - // ------------------------------ Prepare output blobs ------------------------------------------------- - slog::info << "Preparing output blobs" << slog::endl; - InferenceEngine::OutputsDataMap outputInfo(cnnNetwork.getOutputsInfo()); - InferenceEngine::BlobMap outputBlobs; - for (auto& item : outputInfo) { - const InferenceEngine::DataPtr outData = item.second; - if (!outData) { - throw std::logic_error("output data pointer is not valid"); + // ----------------- 7. Loading the model to the device -------------------------------------------------------- + next_step(); + + std::map config = {{ CONFIG_KEY(PERF_COUNT), perf_counts ? CONFIG_VALUE(YES) : + CONFIG_VALUE(NO) }}; + ExecutableNetwork exeNetwork = ie.LoadNetwork(cnnNetwork, device_name, config); + + // ----------------- 8. Setting optimal runtime parameters ----------------------------------------------------- + next_step(); + + // Number of requests + uint32_t nireq = FLAGS_nireq; + if (nireq == 0) { + std::string key = METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS); + try { + nireq = exeNetwork.GetMetric(key).as(); + } catch (const details::InferenceEngineException& ex) { + THROW_IE_EXCEPTION + << "Every device used with the benchmark_app should " + << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. " + << "Failed to query the metric for the " << device_name << " with error:" << ex.what(); } - InferenceEngine::SizeVector outputDims = outData->dims; - const InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32; - - /** Set the precision of output data provided by the user, should be called before load of the network to the plugin **/ - outData->setPrecision(outputPrecision); - InferenceEngine::TBlob::Ptr output = InferenceEngine::make_shared_blob(item.second->getTensorDesc()); - output->allocate(); - outputBlobs[item.first] = output; } - progressBar.addProgress(1); - progressBar.finish(); - - // --------------------------- 5. Loading model to the plugin ------------------------------------------ - - std::cout << "[Step 5/8] Loading model to the plugin " << std::endl; - progressBar.newBar(1); - - std::map networkConfig; - if (FLAGS_d.find("CPU") != std::string::npos) { // CPU supports few special performance-oriented keys - // limit threading for CPU portion of inference - if (FLAGS_nthreads != 0) - networkConfig[PluginConfigParams::KEY_CPU_THREADS_NUM] = std::to_string(FLAGS_nthreads); - // pin threads for CPU portion of inference - networkConfig[PluginConfigParams::KEY_CPU_BIND_THREAD] = FLAGS_pin; - // for pure CPU execution, more throughput-oriented execution via streams - if (FLAGS_api == "async" && FLAGS_d == "CPU") - networkConfig[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = std::to_string(FLAGS_nireq); + // Iteration limit + uint32_t niter = FLAGS_niter; + if ((niter > 0) && (FLAGS_api == "async")) { + niter = ((niter + nireq - 1)/nireq)*nireq; + if (FLAGS_niter != niter) { + slog::warn << "Number of iterations was aligned by request number from " + << FLAGS_niter << " to " << niter << " using number of requests " << nireq << slog::endl; + } } - if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) { - networkConfig[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES; + // Time limit + uint32_t duration_seconds = 0; + if (FLAGS_t != 0) { + // time limit + duration_seconds = FLAGS_t; + } else if (FLAGS_niter == 0) { + // default time limit + duration_seconds = deviceDefaultDeviceDurationInSeconds(device_name); } + uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds); - InferenceEngine::ExecutableNetwork exeNetwork = plugin.LoadNetwork(cnnNetwork, networkConfig); + // ----------------- 9. Creating infer requests and filling input blobs ---------------------------------------- + next_step(); - progressBar.addProgress(1); - progressBar.finish(); + InferRequestsQueue inferRequestsQueue(exeNetwork, nireq); - // --------------------------- 6. Create infer requests and fill input blobs --------------------------- + fillBlobs(inputFiles, batchSize, inputInfo, inferRequestsQueue.requests); - std::cout << "[Step 6/8] Create infer requests and fill input blobs with images" << std::endl; - progressBar.newBar(1); - - std::vector inferRequests; - auto numOfReq = (FLAGS_api == "async") ? FLAGS_nireq : 1; - inferRequests.reserve(numOfReq); - - for (size_t i = 0; i < numOfReq; i++) { - inferRequests.push_back(std::make_shared(exeNetwork)); - slog::info << "Infer Request " << i << " created" << slog::endl; + // ----------------- 10. Measuring performance ------------------------------------------------------------------ + size_t progressCnt = 0; + size_t progressBarTotalCount = progressBarDefaultTotalCount; + size_t iteration = 0; - for (const InputsDataMap::value_type& item : inputInfo) { - Blob::Ptr inputBlob = inferRequests[i]->getBlob(item.first); - fillBlobWithImage(inputBlob, inputImages, batchSize, *item.second); + std::stringstream ss; + ss << "Start inference " << FLAGS_api << "ronously"; + if (FLAGS_api == "async") { + if (!ss.str().empty()) { + ss << ", "; + } + ss << nireq << " inference requests"; + std::stringstream device_ss; + for (auto& nstreams : device_nstreams) { + if (!device_ss.str().empty()) { + device_ss << ", "; + } + device_ss << nstreams.second << " streams for " << nstreams.first; + } + if (!device_ss.str().empty()) { + ss << " using " << device_ss.str(); } } - - progressBar.addProgress(1); - progressBar.finish(); - - // --------------------------- 7. Performance measurements stuff ------------------------------------------ - - long long durationInNanoseconds; - if (FLAGS_niter != 0) { - durationInNanoseconds = 0LL; - } else { - durationInNanoseconds = getDurationInNanoseconds(FLAGS_d); + ss << ", limits: "; + if (duration_seconds > 0) { + ss << getDurationInMilliseconds(duration_seconds) << " ms duration"; } + if (niter != 0) { + if (duration_seconds == 0) { + progressBarTotalCount = niter; + } + if (duration_seconds > 0) { + ss << ", "; + } + ss << niter << " iterations"; + } + next_step(ss.str()); - std::map emptyStat = {}; - StatisticsReport::Config config = { - FLAGS_d, - FLAGS_api, - batchSize, - FLAGS_nireq, - FLAGS_niter, - FLAGS_nthreads, - FLAGS_pin, - FLAGS_report_type, - FLAGS_report_folder - }; - StatisticsReport statistics(config); - double fps; - double totalDuration; - - size_t progressCnt = 0; - size_t progressBarTotalCount; - size_t iteration = 0; + // warming up - out of scope + auto inferRequest = inferRequestsQueue.getIdleRequest(); + if (!inferRequest) { + THROW_IE_EXCEPTION << "No idle Infer Requests!"; + } if (FLAGS_api == "sync") { - InferReqWrap::Ptr inferRequest = inferRequests[0]; - - std::cout << "[Step 7/8] "; - if (FLAGS_niter != 0) { - std::cout << "Start inference synchronously (" << FLAGS_niter << " sync inference executions)" << std::endl; - progressBarTotalCount = FLAGS_niter; - } else { - std::cout << "Start inference synchronously (" << durationInNanoseconds * 0.000001 << " ms duration)" << std::endl; - progressBarTotalCount = progressBarDefaultTotalCount; - } - - // warming up - out of scope inferRequest->infer(); + } else { + inferRequest->startAsync(); + } + inferRequestsQueue.waitAll(); + inferRequestsQueue.resetTimes(); + + const auto startTime = Time::now(); + auto execTime = std::chrono::duration_cast(Time::now() - startTime).count(); + + /** Start inference & calculate performance **/ + /** to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ + ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress); + + while ((niter != 0LL && iteration < niter) || + (duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) || + (FLAGS_api == "async" && iteration % nireq != 0)) { + inferRequest = inferRequestsQueue.getIdleRequest(); + if (!inferRequest) { + THROW_IE_EXCEPTION << "No idle Infer Requests!"; + } - const auto startTime = Time::now(); - auto execTime = std::chrono::duration_cast(Time::now() - startTime).count(); - - /** Start inference & calculate performance **/ - progressBar.newBar(progressBarTotalCount); - while ((iteration < FLAGS_niter) || - ((FLAGS_niter == 0) && (execTime < durationInNanoseconds))) { + if (FLAGS_api == "sync") { inferRequest->infer(); - statistics.add((FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) ? - inferRequest->getPerformanceCounts() : emptyStat, - inferRequest->getExecTime()); - - iteration++; - - if (FLAGS_niter > 0) { - progressBar.addProgress(1); - } else { - execTime = std::chrono::duration_cast(Time::now() - startTime).count(); - // calculate how many progress intervals are covered by current iteration. - // depends on the current iteration time and time of each progress interval. - // Previously covered progress intervals must be skipped. - auto progressIntervalTime = durationInNanoseconds / progressBarTotalCount; - size_t newProgress = execTime / progressIntervalTime - progressCnt; - progressBar.addProgress(newProgress); - progressCnt += newProgress; - } - } - fps = batchSize * 1000.0 / statistics.getMedianLatency(); - totalDuration = std::chrono::duration_cast(Time::now() - startTime).count() * 0.000001; - progressBar.finish(); - } else { - std::cout << "[Step 7/8] "; - if (FLAGS_niter != 0) { - std::cout << "Start inference asynchronously (" << FLAGS_niter << - " async inference executions, " << FLAGS_nireq << - " inference requests in parallel)" << std::endl; - progressBarTotalCount = FLAGS_niter + FLAGS_nireq - 1; } else { - std::cout << std::endl << "Start inference asynchronously (" << durationInNanoseconds * 0.000001 << - " ms duration, " << FLAGS_nireq << - " inference requests in parallel)" << std::endl; - progressBarTotalCount = 1000; + inferRequest->startAsync(); } + iteration++; + execTime = std::chrono::duration_cast(Time::now() - startTime).count(); - size_t currentInference = 0ULL; - bool requiredInferenceRequestsWereExecuted = false; - long long previousInference = 1LL - FLAGS_nireq; - - // warming up - out of scope - inferRequests[0]->startAsync(); - inferRequests[0]->wait(); - - const auto startTime = Time::now(); - auto execTime = std::chrono::duration_cast(Time::now() - startTime).count(); - - /** Start inference & calculate performance **/ - /** to use FLAGS_niter + FLAGS_nireq - 1 to guarantee that last infer requests are executed in the same conditions **/ - progressBar.newBar(progressBarTotalCount); - while ((!requiredInferenceRequestsWereExecuted) || - (iteration < FLAGS_niter + FLAGS_nireq - 1) || - ((FLAGS_niter == 0LL) && (execTime < durationInNanoseconds))) { - // start new inference - inferRequests[currentInference]->startAsync(); - - // wait the latest inference execution if exists - if (previousInference >= 0) { - inferRequests[previousInference]->wait(); - // update statistics with PM counters only in case of detailed or median reports - statistics.add((FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) ? - inferRequests[previousInference]->getPerformanceCounts() : emptyStat, - inferRequests[previousInference]->getExecTime()); - } - - currentInference++; - if (currentInference >= FLAGS_nireq) { - currentInference = 0; - requiredInferenceRequestsWereExecuted = true; - } - - previousInference++; - if (previousInference >= FLAGS_nireq) { - previousInference = 0; - } - - iteration++; - - if (FLAGS_niter > 0) { - progressBar.addProgress(1); - } else { - execTime = std::chrono::duration_cast(Time::now() - startTime).count(); - // calculate how many progress intervals are covered by current iteration. - // depends on the current iteration time and time of each progress interval. - // Previously covered progress intervals must be skipped. - auto progressIntervalTime = durationInNanoseconds / progressBarTotalCount; - size_t newProgress = execTime / progressIntervalTime - progressCnt; - progressBar.addProgress(newProgress); - progressCnt += newProgress; - } + if (niter > 0) { + progressBar.addProgress(1); + } else { + // calculate how many progress intervals are covered by current iteration. + // depends on the current iteration time and time of each progress interval. + // Previously covered progress intervals must be skipped. + auto progressIntervalTime = duration_nanoseconds / progressBarTotalCount; + size_t newProgress = execTime / progressIntervalTime - progressCnt; + progressBar.addProgress(newProgress); + progressCnt += newProgress; } + } - // wait the latest inference executions - for (size_t notCompletedIndex = 0ULL; notCompletedIndex < (FLAGS_nireq - 1); ++notCompletedIndex) { - if (previousInference >= 0) { - inferRequests[previousInference]->wait(); - // update statistics with PM counters only in case of detailed or median reports - statistics.add((FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) ? - inferRequests[previousInference]->getPerformanceCounts() : emptyStat, - inferRequests[previousInference]->getExecTime()); - } - - previousInference++; - if (previousInference >= FLAGS_nireq) { - previousInference = 0LL; - } + // wait the latest inference executions + inferRequestsQueue.waitAll(); + + StatisticsReport statistics({ FLAGS_d, + FLAGS_api, + batchSize, + nireq, + niter, + getDurationInMilliseconds(duration_seconds), + FLAGS_nthreads, + device_nstreams, + FLAGS_pin, + FLAGS_report_type, + FLAGS_report_folder + }); + if (perf_counts) { + for (auto& request : inferRequestsQueue.requests) { + statistics.addPerfCounts(request->getPerformanceCounts()); } - totalDuration = std::chrono::duration_cast(Time::now() - startTime).count() * 0.000001; - fps = batchSize * 1000.0 * iteration / totalDuration; - progressBar.finish(); } + statistics.addLatencies(inferRequestsQueue.getLatencies()); + + double totalDuration = inferRequestsQueue.getDurationInMilliseconds(); + double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / statistics.getMedianLatency() : + batchSize * 1000.0 * iteration / totalDuration; + progressBar.finish(); + + // ----------------- 11. Dumping statistics report ------------------------------------------------------------- + next_step(); - std::cout << "[Step 8/8] Dump statistics report" << std::endl; - progressBar.newBar(1); statistics.dump(fps, iteration, totalDuration); if (!FLAGS_exec_graph_path.empty()) { - CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo(); - execGraphInfo.serialize(FLAGS_exec_graph_path); - slog::info << "executable graph is stored to " << FLAGS_exec_graph_path << slog::endl; + try { + CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo(); + execGraphInfo.serialize(FLAGS_exec_graph_path); + slog::info << "executable graph is stored to " << FLAGS_exec_graph_path << slog::endl; + } catch (const std::exception & ex) { + slog::err << "Can't get executable graph: " << ex.what() << slog::endl; + } + } + + if (FLAGS_pc) { + for (size_t ireq = 0; ireq < nireq; ireq++) { + slog::info << "Pefrormance counts for " << ireq << "-th infer request:" << slog::endl; + printPerformanceCounts(inferRequestsQueue.requests[ireq]->getPerformanceCounts(), std::cout, getFullDeviceName(ie, FLAGS_d), false); + } } - progressBar.addProgress(1); - progressBar.finish(); - std::cout << "Latency: " << statistics.getMedianLatency() << " ms" << std::endl; + std::cout << "Count: " << iteration << " iterations" << std::endl; + std::cout << "Duration: " << totalDuration << " ms" << std::endl; + std::cout << "Latency: " << statistics.getMedianLatency() << " ms" << std::endl; std::cout << "Throughput: " << fps << " FPS" << std::endl; } catch (const std::exception& ex) { slog::err << ex.what() << slog::endl; @@ -451,91 +430,3 @@ int main(int argc, char *argv[]) { return 0; } - -long long getDurationInNanoseconds(const std::string& device) { - static const std::vector> deviceDurationsInSeconds{ - { "CPU", 60LL }, - { "GPU", 60LL }, - { "VPU", 60LL }, - { "MYRIAD", 60LL }, - { "HDDL", 60LL }, - { "FPGA", 120LL }, - { "UNKNOWN", 120LL } - }; - - auto duration = 0LL; - for (const auto& deviceDurationInSeconds : deviceDurationsInSeconds) { - if (device.find(deviceDurationInSeconds.first) != std::string::npos) { - duration = std::max(duration, deviceDurationInSeconds.second); - } - } - - if (duration == 0LL) { - const auto unknownDeviceIt = find_if( - deviceDurationsInSeconds.begin(), - deviceDurationsInSeconds.end(), - [](std::pair deviceDuration) { return deviceDuration.first == "UNKNOWN"; }); - - if (unknownDeviceIt == deviceDurationsInSeconds.end()) { - throw std::logic_error("UNKNOWN device was not found in device duration list"); - } - duration = unknownDeviceIt->second; - slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used" << slog::endl; - } - - return duration * 1000000000LL; -} - -void fillBlobWithImage( - Blob::Ptr& inputBlob, - const std::vector& filePaths, - const size_t& batchSize, - const InferenceEngine::InputInfo& info) { - - auto inputBlobData = inputBlob->buffer().as(); - const SizeVector& inputBlobDims = inputBlob->dims(); - - slog::info << "Network Input dimensions (" << info.getTensorDesc().getLayout() << "): "; - for (const auto& i : info.getTensorDesc().getDims()) { - slog::info << i << " "; - } - slog::info << slog::endl; - - /** Collect images data ptrs **/ - std::vector> vreader; - vreader.reserve(batchSize); - - for (size_t i = 0ULL, inputIndex = 0ULL; i < batchSize; i++, inputIndex++) { - if (inputIndex >= filePaths.size()) { - inputIndex = 0ULL; - } - - slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl; - FormatReader::ReaderPtr reader(filePaths[inputIndex].c_str()); - if (reader.get() == nullptr) { - slog::warn << "Image " << filePaths[inputIndex] << " cannot be read!" << slog::endl << slog::endl; - continue; - } - - /** Getting image data **/ - std::shared_ptr imageData(reader->getData(info.getDims()[0], info.getDims()[1])); - if (imageData) { - vreader.push_back(imageData); - } - } - - /** Fill input tensor with images. First b channel, then g and r channels **/ - const size_t numChannels = inputBlobDims[2]; - const size_t imageSize = inputBlobDims[1] * inputBlobDims[0]; - /** Iterate over all input images **/ - for (size_t imageId = 0; imageId < vreader.size(); ++imageId) { - /** Iterate over all pixel in image (b,g,r) **/ - for (size_t pid = 0; pid < imageSize; pid++) { - /** Iterate over all channels **/ - for (size_t ch = 0; ch < numChannels; ++ch) { - /** [images stride + channels stride + pixel id ] all in bytes **/ - inputBlobData[imageId * imageSize * numChannels + ch * imageSize + pid] = vreader.at(imageId).get()[pid*numChannels + ch]; - } - } - } -} diff --git a/inference-engine/samples/benchmark_app/progress_bar.hpp b/inference-engine/samples/benchmark_app/progress_bar.hpp index bc7e48527af9c2..4da38e9d8dddc2 100644 --- a/inference-engine/samples/benchmark_app/progress_bar.hpp +++ b/inference-engine/samples/benchmark_app/progress_bar.hpp @@ -11,31 +11,42 @@ /// @brief Responsible for progress bar handling within the benchmark_app class ProgressBar { public: - ProgressBar(size_t totalNum, bool stream_output) { - _bar.reset(new ConsoleProgress(totalNum, stream_output)); + explicit ProgressBar(size_t totalNum, bool streamOutput = false, bool progressEnabled = false) { + _bar.reset(new ConsoleProgress(totalNum, streamOutput)); + _streamOutput = streamOutput; _isFinished = true; + _progressEnabled = progressEnabled; } void addProgress(size_t num) { _isFinished = false; - _bar->addProgress(num); + if (_progressEnabled) { + _bar->addProgress(num); + } } - void finish() { + void finish(size_t num = 0) { + if (num > 0) { + addProgress(num); + } _isFinished = true; _bar->finish(); - std::cout << std::endl; + if (_progressEnabled) { + std::cout << std::endl; + } } void newBar(size_t totalNum) { if (_isFinished) { - _bar.reset(new ConsoleProgress(totalNum)); + _bar.reset(new ConsoleProgress(totalNum, _streamOutput)); } else { - throw std::logic_error("Can't create new bar. Current progress bar is still in progress"); + throw std::logic_error("Cannot create a new bar. Current bar is still in progress"); } } private: std::unique_ptr _bar; + bool _streamOutput; bool _isFinished; -}; \ No newline at end of file + bool _progressEnabled; +}; diff --git a/inference-engine/samples/benchmark_app/statistics_report.cpp b/inference-engine/samples/benchmark_app/statistics_report.cpp index 3bb0df4e2b5340..821f4fe55e3911 100644 --- a/inference-engine/samples/benchmark_app/statistics_report.cpp +++ b/inference-engine/samples/benchmark_app/statistics_report.cpp @@ -10,32 +10,23 @@ #include "statistics_report.hpp" -void StatisticsReport::add(const std::map &pmStat, const double &latency) { - if (_config.niter > 0 && _config.niter == _performanceCounters.size()) { - // do not add elements for the adittionaly executed requests. - return; - } - - _latencies.push_back(latency); - if (_config.report_type == medianCntReport || _config.report_type == detailedCntReport) { +void StatisticsReport::addPerfCounts(const std::map &pmStat) { + if (_config.report_type == averageCntReport || _config.report_type == detailedCntReport) { // collect per-iteration statistics only in case of enabled median/detailed statistic collecting _performanceCounters.push_back(pmStat); } } -void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, const double &totalExecTime) { +void StatisticsReport::addLatencies(const std::vector &latencies) { + _latencies.insert(_latencies.end(), latencies.begin(), latencies.end()); +} + +void StatisticsReport::dump(const double &fps, const size_t &iteration_number, const double &totalExecTime) { if (_config.report_type.empty()) { slog::info << "Statistics collecting was not requested. No reports are dumped." << slog::endl; return; } - size_t numMeasuredReq = numProcessedReq; - if (_config.api == "async" && _config.niter > 0) { - // in this case number of processed requests is higher than the value of -niter option. - // but we need to handle statistics for -niter number of requests only - numMeasuredReq = _config.niter; - } - std::string separator = #if defined _WIN32 || defined __CYGWIN__ # if defined UNICODE @@ -53,24 +44,24 @@ void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, co // resulting number of columns in csv file depends on the report_type. If it's noCntReport, then // no PM data is collected and there are only 3 columns in the file (in configuration section). If it's - // medianCntReport then median PM values are collected per each layer and the number of columns is 6. + // averageCntReport then median PM values are collected per each layer and the number of columns is 6. // Example from GPU: // // layer name;exec status;layer type;exec type;real time;cpu time; // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;615;3; // Here, all the data are taken from InferenceEngine::InferenceEngineProfileInfo. // - // In case of detailedCntReport the number of columns is 4 + numMeasuredReq * 2, because first 4 parameters + // In case of detailedCntReport the number of columns is 4 + _config.nireq * 2, because first 4 parameters // are the same but realTime and cpuTime can be different on each iteration (example from 5 GPU requests): // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;630,3;617,3;616,3;615,3;617,3; size_t numOfColumns = 0; if (_config.report_type == noCntReport) { numOfColumns = 3; - } else if (_config.report_type == medianCntReport) { + } else if (_config.report_type == averageCntReport) { numOfColumns = 6; } else { // for detailedCntReport - numOfColumns = 4 + numMeasuredReq * 2; + numOfColumns = 4 + _config.nireq * 2; } auto completeCsvRow = [](CsvDumper &dumper, size_t numOfColumns, size_t filled) { @@ -95,8 +86,13 @@ void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, co completeCsvRow(dumper, numOfColumns, 3); dumper << "number of parallel infer requests" << " -nireq" << _config.nireq; completeCsvRow(dumper, numOfColumns, 3); + dumper << "duration in ms" << " -t" << _config.duration; + completeCsvRow(dumper, numOfColumns, 3); dumper << "number of CPU threads" << " -nthreads" << _config.cpu_nthreads; completeCsvRow(dumper, numOfColumns, 3); + for (auto& item : _config.nstreams) + dumper << "number of " << item.first << " streams" << " -nstreams" << item.second; + completeCsvRow(dumper, numOfColumns, 3); dumper << "CPU pinning enabled" << " -pin" << _config.cpu_pin; completeCsvRow(dumper, numOfColumns, 3); @@ -104,8 +100,8 @@ void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, co // write PM data from each iteration if (!_performanceCounters.empty()) { - if (_config.report_type != medianCntReport && _config.report_type != detailedCntReport) { - throw std::logic_error("PM data should only be collected for median or detailed report types"); + if (_config.report_type != averageCntReport && _config.report_type != detailedCntReport) { + throw std::logic_error("PM data can only be collected for average or detailed report types"); } // this vector is sorted according to network layers execution order. @@ -115,19 +111,20 @@ void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, co completeCsvRow(dumper, numOfColumns, 1); dumper << "layer name" << "exec status" << "layer type" << "exec type"; - if (_config.report_type == medianCntReport) { - dumper << "median real time" << "median cpu time"; + if (_config.report_type == averageCntReport) { + dumper << "average real time" << "average cpu time"; completeCsvRow(dumper, numOfColumns, 6); } else { // detailedCntReport case for (size_t i = 0; i< _performanceCounters.size(); i++) { - dumper << "realTime_iter" + std::to_string(i) << "cpuTime_iter" + std::to_string(i); + dumper << "realTime_req" + std::to_string(i) << "cpuTime_req" + std::to_string(i); } completeCsvRow(dumper, numOfColumns, 4 + _performanceCounters.size() * 2); } for (const auto &layer : performanceMapSorted) { dumper << layer.first; // layer name + switch (layer.second.status) { case InferenceEngine::InferenceEngineProfileInfo::EXECUTED: dumper << "EXECUTED"; @@ -141,14 +138,16 @@ void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, co } dumper << layer.second.layer_type << layer.second.exec_type; - if (_config.report_type == medianCntReport) { - // write median realTime and cpuTime from each processed request for current layer + if (_config.report_type == averageCntReport) { + // write average realTime and cpuTime from each processed request for current layer dumper << - std::to_string(getMedianValue(_perLayerRealTime[layer.first]) / 1000.0) << - std::to_string(getMedianValue(_perLayerCpuTime[layer.first]) / 1000.0); + std::to_string(std::accumulate(_perLayerRealTime[layer.first].begin(), + _perLayerRealTime[layer.first].end(), 0.0) / _perLayerRealTime[layer.first].size() / 1000.0) << + std::to_string(std::accumulate(_perLayerCpuTime[layer.first].begin(), + _perLayerCpuTime[layer.first].end(), 0.0) / _perLayerCpuTime[layer.first].size() / 1000.0); } else { // write all realTime and cpuTime from each processed request for current layer - for (size_t i = 0; i < numMeasuredReq; i++) { + for (size_t i = 0; i < _config.nireq; i++) { dumper << std::to_string(_perLayerRealTime[layer.first][i] / 1000.0) << std::to_string(_perLayerCpuTime[layer.first][i] / 1000.0); } } @@ -162,22 +161,22 @@ void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, co completeCsvRow(dumper, numOfColumns, 1); dumper << "metric"; - for (size_t i = 0; i < _latencies.size(); i++) { + for (size_t i = 0; i < _totalLayersTime.size(); i++) { // detailedCntReport case - dumper << "iter" + std::to_string(i); + dumper << "req" + std::to_string(i); } - completeCsvRow(dumper, numOfColumns, 4 + _latencies.size()); + completeCsvRow(dumper, numOfColumns, 4 + _totalLayersTime.size()); dumper << "latencies"; - for (const auto &lat : _latencies) { - dumper << lat; + for (const auto &lat : _totalLayersTime) { + dumper << lat / 1000.0; } - completeCsvRow(dumper, numOfColumns, _latencies.size()); + completeCsvRow(dumper, numOfColumns, _totalLayersTime.size()); dumper.endLine(); } dumper << "Execution results"; completeCsvRow(dumper, numOfColumns, 1); - dumper << "number of measured infer requests" << numMeasuredReq; + dumper << "number of iterations" << iteration_number; completeCsvRow(dumper, numOfColumns, 2); dumper << "latency" << getMedianValue(_latencies); completeCsvRow(dumper, numOfColumns, 2); @@ -203,11 +202,14 @@ std::vector> // iterate over each processed infer request and handle its PM data for (auto &pm : _performanceCounters) { + long long total = 0L; // iterate over each layer from sorted vector and add required PM data to the per-layer maps for (const auto & it : performanceMapSorted) { _perLayerRealTime[it.first].push_back(pm[it.first].realTime_uSec); _perLayerCpuTime[it.first].push_back(pm[it.first].cpu_uSec); + total += pm[it.first].realTime_uSec; } + _totalLayersTime.push_back(total); } return performanceMapSorted; } diff --git a/inference-engine/samples/benchmark_app/statistics_report.hpp b/inference-engine/samples/benchmark_app/statistics_report.hpp index 909fbcb9c94cd5..f7e0bb27198248 100644 --- a/inference-engine/samples/benchmark_app/statistics_report.hpp +++ b/inference-engine/samples/benchmark_app/statistics_report.hpp @@ -16,7 +16,7 @@ // @brief statistics reports types static constexpr char noCntReport[] = "no_counters"; -static constexpr char medianCntReport[] = "median_counters"; +static constexpr char averageCntReport[] = "average_counters"; static constexpr char detailedCntReport[] = "detailed_counters"; /// @brief Responsible for collecting of statistics and dumping to .csv file @@ -28,19 +28,23 @@ class StatisticsReport { size_t batch; size_t nireq; size_t niter; + uint64_t duration; size_t cpu_nthreads; + std::map nstreams; std::string cpu_pin; std::string report_type; std::string report_folder; }; - explicit StatisticsReport(const Config &config) : _config(config) { - if (_config.niter > 0) { - _performanceCounters.reserve(_config.niter); + explicit StatisticsReport(Config config) : _config(std::move(config)) { + if (_config.nireq > 0) { + _performanceCounters.reserve(_config.nireq); } } - void add(const std::map &pmStat, const double &latency); + void addPerfCounts(const std::map &pmStat); + + void addLatencies(const std::vector &latency); void dump(const double &fps, const size_t &numProcessedReq, const double &totalExecTime); @@ -64,4 +68,5 @@ class StatisticsReport { std::map> _perLayerRealTime; // mapping from network layer to a vector of calculated CPU Time values from each processed infer request. std::map> _perLayerCpuTime; + std::vector _totalLayersTime; }; diff --git a/inference-engine/samples/benchmark_app/utils.cpp b/inference-engine/samples/benchmark_app/utils.cpp new file mode 100644 index 00000000000000..72d1b0f7aff867 --- /dev/null +++ b/inference-engine/samples/benchmark_app/utils.cpp @@ -0,0 +1,97 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "utils.hpp" + +uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) { + static const std::map deviceDefaultDurationInSeconds { + { "CPU", 60 }, + { "GPU", 60 }, + { "VPU", 60 }, + { "MYRIAD", 60 }, + { "HDDL", 60 }, + { "FPGA", 120 }, + { "UNKNOWN", 120 } + }; + uint32_t duration = 0; + for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) { + if (device.find(deviceDurationInSeconds.first) != std::string::npos) { + duration = std::max(duration, deviceDurationInSeconds.second); + } + } + if (duration == 0) { + const auto unknownDeviceIt = find_if( + deviceDefaultDurationInSeconds.begin(), + deviceDefaultDurationInSeconds.end(), + [](std::pair deviceDuration) { return deviceDuration.first == "UNKNOWN"; }); + + if (unknownDeviceIt == deviceDefaultDurationInSeconds.end()) { + throw std::logic_error("UNKNOWN device was not found in the device duration list"); + } + duration = unknownDeviceIt->second; + slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used" << slog::endl; + } + return duration; +} + +std::vector split(const std::string &s, char delim) { + std::vector result; + std::stringstream ss(s); + std::string item; + + while (getline(ss, item, delim)) { + result.push_back(item); + } + return result; +} + +std::vector parseDevices(const std::string& device_string) { + std::string comma_separated_devices = device_string; + if (comma_separated_devices.find(":") != std::string::npos) { + comma_separated_devices = comma_separated_devices.substr(comma_separated_devices.find(":") + 1); + } + auto devices = split(comma_separated_devices, ','); + for (auto& device : devices) + device = device.substr(0, device.find("(")); + return devices; +} + +std::map parseValuePerDevice(const std::vector& devices, + const std::string& values_string) { + // Format: :,: or just + auto values_string_upper = values_string; + std::transform(values_string_upper.begin(), + values_string_upper.end(), + values_string_upper.begin(), + [](unsigned char c){ return std::toupper(c); }); + std::map result; + auto device_value_strings = split(values_string_upper, ','); + for (auto& device_value_string : device_value_strings) { + auto device_value_vec = split(device_value_string, ':'); + if (device_value_vec.size() == 2) { + auto it = std::find(devices.begin(), devices.end(), device_value_vec.at(0)); + if (it != devices.end()) { + result[device_value_vec.at(0)] = std::stoi(device_value_vec.at(1)); + } + } else if (device_value_vec.size() == 1) { + uint32_t value = std::stoi(device_value_vec.at(0)); + for (auto& device : devices) { + result[device] = value; + } + } else if (device_value_vec.size() != 0) { + throw std::runtime_error("Unknown string format: " + values_string); + } + } + return result; +} diff --git a/inference-engine/samples/benchmark_app/utils.hpp b/inference-engine/samples/benchmark_app/utils.hpp new file mode 100644 index 00000000000000..4c2634d8dc0093 --- /dev/null +++ b/inference-engine/samples/benchmark_app/utils.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +std::vector parseDevices(const std::string& device_string); +uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device); +std::map parseValuePerDevice(const std::vector& devices, + const std::string& values_string); +uint32_t deviceDefaultRequestsNumber(const std::string& device); diff --git a/inference-engine/samples/build_samples.sh b/inference-engine/samples/build_samples.sh new file mode 100644 index 00000000000000..999a17b25e6dfd --- /dev/null +++ b/inference-engine/samples/build_samples.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# Copyright (C) 2018-2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +error() { + local code="${3:-1}" + if [[ -n "$2" ]];then + echo "Error on or near line $1: $2; exiting with status ${code}" + else + echo "Error on or near line $1; exiting with status ${code}" + fi + exit "${code}" +} +trap 'error ${LINENO}' ERR + +SAMPLES_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +printf "\nSetting environment variables for building samples...\n" + +if [ -z "$INTEL_OPENVINO_DIR" ]; then + if [ -e "$SAMPLES_PATH/../../bin/setupvars.sh" ]; then + setvars_path="$SAMPLES_PATH/../../bin/setupvars.sh" + elif [ -e "$SAMPLES_PATH/../../../bin/setupvars.sh" ]; then + setvars_path="$SAMPLES_PATH/../../../bin/setupvars.sh" + else + printf "Error: Failed to set the environment variables automatically. To fix, run the following command:\n source /bin/setupvars.sh\n where INSTALL_DIR is the OpenVINO installation directory.\n\n" + exit 1 + fi + if ! source $setvars_path ; then + printf "Unable to run ./setupvars.sh. Please check its presence. \n\n" + exit 1 + fi +else + # case for run with `sudo -E` + source "$INTEL_OPENVINO_DIR/bin/setupvars.sh" +fi + +if ! command -v cmake &>/dev/null; then + printf "\n\nCMAKE is not installed. It is required to build Inference Engine samples. Please install it. \n\n" + exit 1 +fi + +build_dir=$HOME/inference_engine_samples_build + +OS_PATH=$(uname -m) +NUM_THREADS="-j2" + +if [ $OS_PATH == "x86_64" ]; then + OS_PATH="intel64" + NUM_THREADS="-j8" +fi + +if [ -e $build_dir/CMakeCache.txt ]; then + rm -rf $build_dir/CMakeCache.txt +fi +mkdir -p $build_dir +cd $build_dir +cmake -DCMAKE_BUILD_TYPE=Release $SAMPLES_PATH +make $NUM_THREADS + +printf "\nBuild completed, you can find binaries for all samples in the $build_dir/${OS_PATH}/Release subfolder.\n\n" diff --git a/inference-engine/samples/build_samples_msvc.bat b/inference-engine/samples/build_samples_msvc.bat new file mode 100644 index 00000000000000..9063fb159d94fe --- /dev/null +++ b/inference-engine/samples/build_samples_msvc.bat @@ -0,0 +1,146 @@ +@echo off + +:: Copyright (C) 2018-2019 Intel Corporation +:: +:: Licensed under the Apache License, Version 2.0 (the "License"); +:: you may not use this file except in compliance with the License. +:: You may obtain a copy of the License at +:: +:: http://www.apache.org/licenses/LICENSE-2.0 +:: +:: Unless required by applicable law or agreed to in writing, software +:: distributed under the License is distributed on an "AS IS" BASIS, +:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +:: See the License for the specific language governing permissions and +:: limitations under the License. + + +@setlocal +SETLOCAL EnableDelayedExpansion +set "ROOT_DIR=%~dp0" + +set "SOLUTION_DIR64=%USERPROFILE%\Documents\Intel\OpenVINO\inference_engine_samples_build" +if "%InferenceEngine_DIR%"=="" set "InferenceEngine_DIR=%ROOT_DIR%\..\share" + +set MSBUILD_BIN= +set VS_PATH= +set VS_VERSION= + +if not "%1" == "" ( + if "%1"=="VS2015" ( + set "VS_VERSION=2015" + ) else if "%1"=="VS2017" ( + set "VS_VERSION=2017" + ) else if "%1"=="VS2019" ( + set "VS_VERSION=2019" + ) else ( + echo Unrecognized option specified "%1" + echo Supported command line options: VS2015, VS2017, VS2019 + goto errorHandling + ) +) + +if "%INTEL_OPENVINO_DIR%"=="" ( + if exist "%ROOT_DIR%\..\..\bin\setupvars.bat" ( + call "%ROOT_DIR%\..\..\bin\setupvars.bat" + ) else ( + if exist "%ROOT_DIR%\..\..\..\bin\setupvars.bat" ( + call "%ROOT_DIR%\..\..\..\bin\setupvars.bat" + ) else ( + echo Failed to set the environment variables automatically + echo To fix, run the following command: ^\bin\setupvars.bat + echo where INSTALL_DIR is the OpenVINO installation directory. + GOTO errorHandling + ) + ) +) + +if "%PROCESSOR_ARCHITECTURE%" == "AMD64" ( + set "PLATFORM=x64" +) else ( + set "PLATFORM=Win32" +) + +set VSWHERE="false" +if exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( + set VSWHERE="true" + cd "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer" +) else if exist "%ProgramFiles%\Microsoft Visual Studio\Installer\vswhere.exe" ( + set VSWHERE="true" + cd "%ProgramFiles%\Microsoft Visual Studio\Installer" +) else ( + echo "vswhere tool is not found" +) + +if !VSWHERE! == "true" ( + if "!VS_VERSION!"=="" ( + echo Searching the latest Visual Studio... + for /f "usebackq tokens=*" %%i in (`vswhere -latest -products * -requires Microsoft.Component.MSBuild -property installationPath`) do ( + set VS_PATH=%%i + ) + ) else ( + echo Searching Visual Studio !VS_VERSION!... + for /f "usebackq tokens=*" %%i in (`vswhere -products * -requires Microsoft.Component.MSBuild -property installationPath`) do ( + set CUR_VS=%%i + if not "!CUR_VS:%VS_VERSION%=!"=="!CUR_VS!" ( + set VS_PATH=!CUR_VS! + ) + ) + ) + if exist "!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe" ( + set "MSBUILD_BIN=!VS_PATH!\MSBuild\14.0\Bin\MSBuild.exe" + ) + if exist "!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe" ( + set "MSBUILD_BIN=!VS_PATH!\MSBuild\15.0\Bin\MSBuild.exe" + ) + if exist "!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe" ( + set "MSBUILD_BIN=!VS_PATH!\MSBuild\Current\Bin\MSBuild.exe" + ) +) + +if "!MSBUILD_BIN!" == "" ( + if "!VS_VERSION!"=="2015" ( + if exist "C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" ( + set "MSBUILD_BIN=C:\Program Files (x86)\MSBuild\14.0\Bin\MSBuild.exe" + set "MSBUILD_VERSION=14 2015" + ) + ) else if "!VS_VERSION!"=="2017" ( + if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe" ( + set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\MSBuild\15.0\Bin\MSBuild.exe" + set "MSBUILD_VERSION=15 2017" + ) else if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe" ( + set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\MSBuild.exe" + set "MSBUILD_VERSION=15 2017" + ) else if exist "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" ( + set "MSBUILD_BIN=C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBuild\15.0\Bin\MSBuild.exe" + set "MSBUILD_VERSION=15 2017" + ) + ) +) else ( + if not "!MSBUILD_BIN:2019=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=16 2019" + if not "!MSBUILD_BIN:2017=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=15 2017" + if not "!MSBUILD_BIN:2015=!"=="!MSBUILD_BIN!" set "MSBUILD_VERSION=14 2015" +) + +if "!MSBUILD_BIN!" == "" ( + echo Build tools for Microsoft Visual Studio !VS_VERSION! cannot be found. If you use Visual Studio 2017, please download and install build tools from https://www.visualstudio.com/downloads/#build-tools-for-visual-studio-2017 + GOTO errorHandling +) + +if exist "%SOLUTION_DIR64%\CMakeCache.txt" del "%SOLUTION_DIR64%\CMakeCache.txt" + +echo Creating Visual Studio %MSBUILD_VERSION% %PLATFORM% files in %SOLUTION_DIR64%... && ^ +cd "%ROOT_DIR%" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio !MSBUILD_VERSION!" -A %PLATFORM% "%ROOT_DIR%" + +echo. +echo ###############^|^| Build Inference Engine samples using MS Visual Studio (MSBuild.exe) ^|^|############### +echo. +echo "!MSBUILD_BIN!" Samples.sln /p:Configuration=Release +"!MSBUILD_BIN!" Samples.sln /p:Configuration=Release +if ERRORLEVEL 1 GOTO errorHandling + +echo Done. +goto :eof + +:errorHandling +echo Error diff --git a/inference-engine/samples/calibration_tool/CMakeLists.txt b/inference-engine/samples/calibration_tool/CMakeLists.txt deleted file mode 100644 index c654336986473a..00000000000000 --- a/inference-engine/samples/calibration_tool/CMakeLists.txt +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set (TARGET_NAME "calibration_tool") - -file (GLOB MAIN_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/pugixml/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/ClassificationProcessor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/classification_set_generator.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/image_decoder.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/ObjectDetectionProcessor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/Processor.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app/VOCAnnotationParser.cpp - ) - -file (GLOB MAIN_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/pugixml/*.hpp - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${MAIN_SRC}) -source_group("include" FILES ${MAIN_HEADERS}) - -# Find OpenCV components if exist -find_package(OpenCV COMPONENTS imgcodecs QUIET) -if(NOT(OpenCV_FOUND)) - message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped") - return() -endif() - -# Properties->C/C++->General->Additional Include Directories -include_directories (${CMAKE_CURRENT_SOURCE_DIR}/../classification_sample/core - ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${CMAKE_CURRENT_SOURCE_DIR}/../common/os/windows - ${CMAKE_CURRENT_SOURCE_DIR}/../../include - ${CMAKE_CURRENT_SOURCE_DIR}/../validation_app) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS}) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) -target_link_libraries(${TARGET_NAME} gflags IE::ie_cpu_extension ${InferenceEngine_LIBRARIES} ${OpenCV_LIBRARIES}) -if (UNIX) - target_link_libraries(${TARGET_NAME} dl) -endif() - diff --git a/inference-engine/samples/calibration_tool/README.md b/inference-engine/samples/calibration_tool/README.md deleted file mode 100644 index 2883873d75b95f..00000000000000 --- a/inference-engine/samples/calibration_tool/README.md +++ /dev/null @@ -1,117 +0,0 @@ -# C++ Calibration Tool [DEPRECATED] - -> **NOTE**: OpenVINO 2019 R1 release introduced a [Python\* version of the Calibration Tool](./inference-engine/tools/calibration_tool/README.md). This is now a recommended version since it supports a larger set of topologies and datasets. The [C++ version of the Calibration Tool](./inference-engine/samples/calibration_tool/README.md) is still in the package but deprecated and will not be updated for new releases. - -The C++ Calibration Tool calibrates a given FP32 model so that is can be run in low-precision 8-bit integer -mode while keeping the input data of this model in the original precision. - -> **NOTE**: INT8 models are currently supported only by the CPU plugin. For the full list of supported configurations, see the [Supported Devices](./docs/IE_DG/supported_plugins/Supported_Devices.md) topic. - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). - -## Calibration Tool Options - -The core command-line options for the Calibration Tool are the same as for -[Validation Application](./inference-engine/samples/validation_app/README.md). However, the Calibration Tool has the following specific options: `-t`, `-subset`, `-output`, and `-threshold`. - -Running the Calibration Tool with the `-h` option yields the following usage message: -```sh -Usage: calibration_tool [OPTION] - -Available options: - - -h Print a help message - -t Type of an inferred network ("C" by default) - -t "C" to calibrate Classification network and write the calibrated network to IR - -t "OD" to calibrate Object Detection network and write the calibrated network to IR - -t "RawC" to collect only statistics for Classification network and write statistics to IR. With this option, a model is not calibrated. For calibration and statisctics collection, use "-t C" instead. - -t "RawOD" to collect only statistics for Object Detection network and write statistics to IR. With this option, a model is not calibrated. For calibration and statisctics collection, use "-t OD" instead - -i Required. Path to a directory with validation images. For Classification models, the directory must contain folders named as labels with images inside or a .txt file with a list of images. For Object Detection models, the dataset must be in VOC format. - -m Required. Path to an .xml file with a trained model, including model name and extension. - -lbl Labels file path. The labels file contains names of the dataset classes - -l Required for CPU custom layers. Absolute path to a shared library with the kernel implementations. - -c Required for GPU custom kernels. Absolute path to an .xml file with the kernel descriptions. - -d Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD. The application looks for a suitable plugin for the specified device. - -b N Batch size value. If not specified, the batch size value is taken from IR - -ppType Preprocessing type. Options: "None", "Resize", "ResizeCrop" - -ppSize N Preprocessing size (used with ppType="ResizeCrop") - -ppWidth W Preprocessing width (overrides -ppSize, used with ppType="ResizeCrop") - -ppHeight H Preprocessing height (overrides -ppSize, used with ppType="ResizeCrop") - --dump Dump file names and inference results to a .csv file - -subset Number of pictures from the whole validation set tocreate the calibration dataset. Default value is 0, which stands forthe whole provided dataset - -output Output name for calibrated model. Default is _i8.xml|bin - -threshold Threshold for a maximum accuracy drop of quantized model. Must be an integer number (percents) without a percent sign. Default value is 1, which stands for accepted accuracy drop in 1% - -stream_output Flag for printing progress as a plain text.When used, interactive progress bar is replaced with multiline output - - Classification-specific options: - -Czb true "Zero is a background" flag. Some networks are trained with a modified dataset where the class IDs are enumerated from 1, but 0 is an undefined "background" class (which is never detected) - - Object detection-specific options: - -ODkind Type of an Object Detection model. Options: SSD - -ODa Required for Object Detection models. Path to a directory containing an .xml file with annotations for images. - -ODc Required for Object Detection models. Path to a file with a list of classes - -ODsubdir Directory between the path to images (specified with -i) and image name (specified in the .xml file). For VOC2007 dataset, use JPEGImages. -``` - -The tool options are divided into two categories: -1. **Common options** named with a single letter or a word, such as -b or --dump. - These options are the same in all calibration tool modes. -2. **Network type-specific options** named as an acronym of the network type (C or OD) - followed by a letter or a word. - -You can run the tool with public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). - -> **NOTE**: Before running the tool on a trained model, make sure the model is converted to the Inference Engine format (`*.xml` + `*.bin`) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -## Calibrate a Classification Model - -To calibrate a classification convolutional neural network (CNN) -on a subset of images (first 2000 images) from the given dataset (specified with the `-i` option), run the following command: - -```bash -./calibration_tool -t C -i -m /.xml -d -subset 2000 -``` - -The dataset must have the correct format. Classification models support two formats: folders -named as labels that contain all images of this class and ImageNet*-like format, with the -`.txt` file containing list of images and IDs of classes. - -For more information on the structure of the datasets, refer to the **Prepare a Dataset** section of the -[Validation Application document](./inference-engine/samples/validation_app/README.md). - -If you decide to use the subset of the given dataset, use the ImageNet-like format -instead of "folder as classes" format. This brings a more accurate calibration as you are likely to get images -representing different classes. - -To run the sample you can use classification models that can be downloaded with the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or other image classification models. - -For example, to calibrate the trained Caffe\* `resnet-50` classification model, run the following command: - -```bash -./calibration_tool -t C -m /resnet-50.xml -i ILSVRC2012_val.txt -Czb false -ppType "ResizeCrop" -ppSize 342 -b 1 -d CPU -subset 2000 -``` - -## Calibrate Object Detection Model - -This topic demonstrates how to run the Calibration Tool on the Object Detection CNN on a set of images. Please -review the list of Object Detection models used for validation of the Calibration Tool -in the [8-bit Inference Introduction](./docs/IE_DG/Int8Inference.md). -Any network that can be inferred with the Inference Engine and has the same input and output -format as the SSD CNN should be supported as well. - -### Run SSD Network on the VOC dataset - -Before you start calibrating the model, make sure your dataset is in the correct format. For more information, -refer to the **Prepare a Dataset** section of the -[Validation Application document](./inference-engine/samples/validation_app/README.md). - -Once you have prepared the dataset, you can calibrate the model on it by running the following command: -```bash -./calibration_tool -d CPU -t OD -ODa "/VOCdevkit/VOC2007/Annotations" -i "/VOCdevkit" -m "/vgg_voc0712_ssd_300x300.xml" -ODc "/VOC_SSD_Classes.txt" -ODsubdir JPEGImages -subset 500 -``` - -## See Also - -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) -* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) -* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) diff --git a/inference-engine/samples/calibration_tool/calibrator_processors.cpp b/inference-engine/samples/calibration_tool/calibrator_processors.cpp deleted file mode 100644 index 5836f1604330d4..00000000000000 --- a/inference-engine/samples/calibration_tool/calibrator_processors.cpp +++ /dev/null @@ -1,802 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "calibrator_processors.h" -#include // std::string -#include // std::cout -#include // std::stringstream -#include -#include -#include -#include -#include -#include -#include -#include "details/ie_cnn_network_tools.h" -#include "details/caseless.hpp" - -using namespace InferenceEngine; -using namespace InferenceEngine::details; - -using InferenceEngine::details::InferenceEngineException; - -CNNLayerPtr Int8Calibrator::addScaleShiftBeforeLayer(std::string name, CNNLayer::Ptr beforeLayer, size_t port, std::vector scale) { - if (beforeLayer->insData.size() < port) { - THROW_IE_EXCEPTION << "cannot find appropraite port for addScaleShiftBeforeLayer"; - } - - DataPtr pData = beforeLayer->insData[port].lock(); - LayerParams params; - params.name = name; - params.precision = Precision::FP32; - params.type = "ScaleShift"; - CNNLayerPtr lptr = std::make_shared(params); - ScaleShiftLayer *pScaleShift = dynamic_cast(lptr.get()); - if (pScaleShift == nullptr) { - THROW_IE_EXCEPTION << "Layer " << lptr->name << " is not instance of ScaleShiftLayer class"; - } - - SizeVector wdims({ pData->dims[2] }); - - if (scale.size() == 1) { - scale.resize(wdims[0]); - for (size_t i = 1; i < wdims[0]; i++) { - scale[i] = scale[0]; - } - } - - if (scale.size() != pData->dims[2]) { - THROW_IE_EXCEPTION << "Failed to add scaleshift before " << beforeLayer->name << " due to scales and layer output dims incossitency"; - } - - Blob::Ptr weights = nullptr; - weights = make_shared_blob(Precision::FP32, Layout::C, wdims); - weights->allocate(); - float *buffer = weights->buffer().as(); - if (buffer == nullptr) { - THROW_IE_EXCEPTION << "Could not allocate weights buffer"; - } - for (size_t i = 0; i < pData->dims[2]; i++) { - buffer[i] = scale[i]; - } - pScaleShift->_weights = weights; - - - SizeVector bdims({ pData->dims[2] }); - Blob::Ptr biases = nullptr; - biases = make_shared_blob(Precision::FP32, Layout::C, bdims); - biases->allocate(); - buffer = biases->buffer().as(); - for (size_t i = 0; i < pData->dims[2]; i++) { - buffer[i] = 0.f; - } - pScaleShift->_biases = biases; - - Data *edge2 = new Data(*pData.get()); - DataPtr newEdge(edge2); - lptr->insData.push_back(pData); - lptr->outData.push_back(newEdge); - newEdge->name = /*"EdgeAfter_" +*/ params.name; - newEdge->creatorLayer = lptr; - newEdge->inputTo.clear(); - newEdge->inputTo[beforeLayer->name] = beforeLayer; - - pData->inputTo.erase(beforeLayer->name); - pData->inputTo[params.name] = lptr; - - for (size_t i = 0; i < beforeLayer->insData.size(); i++) { - DataPtr d = beforeLayer->insData[i].lock(); - if (d == pData) { - beforeLayer->insData[i] = newEdge; - break; - } - } - return lptr; -} - - -float Int8Calibrator::compare_NRMSD(InferenceEngine::Blob::Ptr res, InferenceEngine::Blob::Ptr ref) { - auto *res_ptr = res->buffer().as(); - - auto *ref_ptr = ref->buffer().as(); - - size_t ref_size = ref->size(); - if (ref_size == 0) { - throw std::logic_error("ref_size can't be equal to zero"); - } - - float sum = 0; - - float mmin = ref_ptr[0], mmax = ref_ptr[0]; - - for (size_t i = 0; i < ref_size; i++) { - float sqr = (ref_ptr[i] - res_ptr[i]); - sqr *= sqr; - sum += sqr; - - mmin = std::min(mmin, ref_ptr[i]); - mmax = std::max(mmax, ref_ptr[i]); - } - - sum /= ref_size; - - sum = pow(sum, 0.5f); - - sum /= mmax - mmin; - - return sum; -} - - -InferenceEngine::NetworkStatsMap Int8Calibrator::getStatistic(float threshold) { - InferenceEngine::NetworkStatsMap netNodesStats; - // go over all outputs and get aggregated statistics - for (auto l : _statData.registeredLayers()) { - NetworkNodeStatsPtr nodeStats; - size_t channels = _statData.getNumberChannels(l); - if (netNodesStats.find(l) == netNodesStats.end()) { - nodeStats = NetworkNodeStatsPtr(new NetworkNodeStats(channels)); - - netNodesStats[l] = nodeStats; - } else { - nodeStats = netNodesStats[l]; - } - for (size_t c = 0; c < channels; c++) { - _statData.getDataMinMax(l, c, nodeStats->_minOutputs[c], nodeStats->_maxOutputs[c], threshold); - } - } - return netNodesStats; -} - - -void Int8Calibrator::collectFP32Statistic() { - _collectByLayer = false; - _collectStatistic = true; - - networkReaderC = InferenceEngine::CNNNetReader(); - networkReaderC.ReadNetwork(_modelFileNameI8C); - if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model"; - /** Extract model name and load weights **/ - std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin"; - networkReaderC.ReadWeights(binFileName.c_str()); - if (_cBatch == 0) { - // Zero means "take batch value from the IR" - _cBatch = networkReaderC.getNetwork().getBatchSize(); - } else { - // Not zero means "use the specified value" - auto input_shapes = networkReaderC.getNetwork().getInputShapes(); - std::string input_name; - SizeVector input_shape; - std::tie(input_name, input_shape) = *input_shapes.begin(); - input_shape[0] = _cBatch; - input_shapes[input_name] = input_shape; - networkReaderC.getNetwork().reshape(input_shapes); - } - - auto network = networkReaderC.getNetwork(); - - - std::vector layersAfterInputs; - - std::string hackPrefix = "scaleshifted_input:"; - - for (auto &&layer : network) { - if (layer->insData.size() > 0) { - std::string inName = layer->input()->getName(); - for (auto &&input : network.getInputsInfo()) { - if (inName == input.first) { - layersAfterInputs.push_back(layer); - _inputsFromLayers[hackPrefix + layer->name] = inName; - } - } - } - } - - for (auto &&layer : layersAfterInputs) { - std::string firstInputName = hackPrefix + layer->name; - auto scaleShiftLayer = addScaleShiftBeforeLayer(firstInputName, layer, 0, { 1.f }); - ((ICNNNetwork&)network).addLayer(scaleShiftLayer); - } - - - // 1. add all layers as output one - for (auto &&layer : network) { - std::string layerType = network.getLayerByName(layer->name.c_str())->type; - if (layerType != "Const") { - if (/*layerType != "Split" &&*/layerType != "Input") { - network.addOutput(layer->name); - } - _statData.registerLayer(layer->name); - } - } - - ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); - _inferRequestI8C = executable_network.CreateInferRequest(); -} - -void Int8Calibrator::validateInt8Config(const InferenceEngine::NetworkStatsMap &stat, - const std::map &layersToInt8, - bool convertFullyConnected) { - _collectByLayer = false; - _collectStatistic = false; - networkReaderC = InferenceEngine::CNNNetReader(); - networkReaderC.ReadNetwork(_modelFileNameI8C); - if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model"; - /** Extract model name and load weights **/ - std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin"; - networkReaderC.ReadWeights(binFileName.c_str()); - if (_cBatch == 0) { - // Zero means "take batch value from the IR" - _cBatch = networkReaderC.getNetwork().getBatchSize(); - } else { - // Not zero means "use the specified value" - auto input_shapes = networkReaderC.getNetwork().getInputShapes(); - std::string input_name; - SizeVector input_shape; - std::tie(input_name, input_shape) = *input_shapes.begin(); - input_shape[0] = _cBatch; - input_shapes[input_name] = input_shape; - networkReaderC.getNetwork().reshape(input_shapes); - } - - // Initialize statistic - ICNNNetworkStats *pstats = nullptr; - StatusCode s = ((ICNNNetwork&)networkReaderC.getNetwork()).getStats(&pstats, nullptr); - if (s == StatusCode::OK && pstats) { - pstats->setNodesStats(stat); - } - - auto network = networkReaderC.getNetwork(); - - for (auto l : network) { - if (l->type == "FullyConnected") { - l->params["quantization_level"] = (convertFullyConnected == false) ? "FP32" : "I8"; - } - } - - for (auto l : layersToInt8) { - network.getLayerByName(l.first.c_str())-> - params["quantization_level"] = (l.second == false) ? "FP32" : "I8"; - } - - ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); - _inferRequestI8C = executable_network.CreateInferRequest(); -} - -CNNNetwork Int8Calibrator::createICNNNetworkForLayer(CNNLayer::Ptr layerToClone, bool hasReLU) { - CNNLayer::Ptr layerRelU = layerToClone->outData[0]->inputTo.begin()->second; - - InferenceEngine::CNNNetReader reader1; - DataPtr inputData = layerToClone->insData[0].lock(); - std::string inputName = inputData->name; - - size_t inputBatch = inputData->getTensorDesc().getDims()[0]; - size_t inputChannels = inputData->getTensorDesc().getDims()[1]; - size_t inputHeight = inputData->getTensorDesc().getDims()[2]; - size_t inputWidth = inputData->getTensorDesc().getDims()[3]; - - DataPtr outputData = layerToClone->outData[0]; - size_t outputBatch = outputData->getTensorDesc().getDims()[0]; - size_t outputChannels = outputData->getTensorDesc().getDims()[1]; - size_t outputHeight = outputData->getTensorDesc().getDims()[2]; - size_t outputWidth = outputData->getTensorDesc().getDims()[3]; - - ConvolutionLayer *pConvS = dynamic_cast(layerToClone.get()); - if (pConvS == nullptr) { - THROW_IE_EXCEPTION << "Layer " << layerToClone->name << " is not instance of ConvolutionLayer class"; - } - - std::string model = " "\ - " "\ - ""\ - ""\ - "" + std::to_string(inputBatch) + ""\ - "" + std::to_string(inputChannels) + ""\ - "" + std::to_string(inputHeight) + ""\ - "" + std::to_string(inputWidth) + ""\ - ""\ - ""\ - ""\ - "name + - "\" type=\"Convolution\" precision=\"FP32\" id=\"1\">"\ - "_stride_x) + - "\" stride-y=\"" + std::to_string(pConvS->_stride_y) + - "\" pad-x=\"" + std::to_string(pConvS->_padding_x) + - "\" pad-y=\"" + std::to_string(pConvS->_padding_y) + - "\" kernel-x=\"" + std::to_string(pConvS->_kernel_x) + - "\" kernel-y=\"" + std::to_string(pConvS->_kernel_y) + - "\" dilation-x=\"" + std::to_string(pConvS->_dilation_x) + - "\" dilation-y=\"" + std::to_string(pConvS->_dilation_y) + - "\" output=\"" + std::to_string(pConvS->_out_depth) + - "\" group=\"" + std::to_string(pConvS->_group) + "\" />"\ - ""\ - ""\ - "" + std::to_string(inputBatch) + ""\ - "" + std::to_string(inputChannels) + ""\ - "" + std::to_string(inputHeight) + ""\ - "" + std::to_string(inputWidth) + ""\ - ""\ - ""\ - ""\ - ""\ - "" + std::to_string(outputBatch) + ""\ - "" + std::to_string(outputChannels) + ""\ - "" + std::to_string(outputHeight) + ""\ - "" + std::to_string(outputWidth) + ""\ - ""\ - ""\ - ""; - if (hasReLU) { - model += "name + - "\" type=\"ReLU\" precision=\"FP32\" id=\"2\">"\ - "" - ""\ - "" + std::to_string(outputBatch) + ""\ - "" + std::to_string(outputChannels) + ""\ - "" + std::to_string(outputHeight) + ""\ - "" + std::to_string(outputWidth) + ""\ - ""\ - ""\ - ""\ - ""\ - "" + std::to_string(outputBatch) + ""\ - "" + std::to_string(outputChannels) + ""\ - "" + std::to_string(outputHeight) + ""\ - "" + std::to_string(outputWidth) + ""\ - ""\ - ""\ - ""; - } - model += " "\ - " "; - if (hasReLU) { - model += " "; - } - model += ""; - - reader1.ReadNetwork(model.c_str(), model.length()); - ICNNNetwork &n = reader1.getNetwork(); - - InferenceEngine::InputsDataMap inputs; - n.getInputsInfo(inputs); - CNNLayerPtr inputLayer = inputs.begin()->second->getInputData()->creatorLayer.lock(); - - CNNLayerPtr convLayer; - n.getLayerByName(layerToClone->name.c_str(), convLayer, nullptr); - ConvolutionLayer *pConvT = dynamic_cast(convLayer.get()); - if (pConvT == nullptr) { - THROW_IE_EXCEPTION << "Layer " << convLayer->name << " is not instance of ConvolutionLayer class"; - } - - pConvT->_weights = pConvS->_weights; - pConvT->_biases = pConvS->_biases; - pConvT->blobs = pConvS->blobs; - - return reader1.getNetwork(); -} - -void Int8Calibrator::collectByLayerStatistic(const InferenceEngine::NetworkStatsMap &stat) { - _collectByLayer = true; - _collectStatistic = false; - networkReaderC = InferenceEngine::CNNNetReader(); - networkReaderC.ReadNetwork(_modelFileNameI8C); - if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model"; - /** Extract model name and load weights **/ - std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin"; - networkReaderC.ReadWeights(binFileName.c_str()); - if (_cBatch != 0) { - auto input_shapes = networkReaderC.getNetwork().getInputShapes(); - std::string input_name; - SizeVector input_shape; - std::tie(input_name, input_shape) = *input_shapes.begin(); - input_shape[0] = _cBatch; - input_shapes[input_name] = input_shape; - networkReaderC.getNetwork().reshape(input_shapes); - } - - auto network = networkReaderC.getNetwork(); - // 1. add all layers as output one - for (auto &&layer : network) { - std::string layerType = network.getLayerByName(layer->name.c_str())->type; - if (/*layerType != "Split" &&*/layerType != "Input" && layerType != "Const") { - network.addOutput(layer->name); - } - - if (layerType == "Convolution") { - _layersAccuracyDrop[layer->name] = 0.f; - } - } - - ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); - _inferRequestI8C = executable_network.CreateInferRequest(); - - // 2. go over all layers which affect accuracy and create network basing on it - for (auto l : _layersAccuracyDrop) { - CNNLayerPtr layerToClone = network.getLayerByName(l.first.c_str()); - CNNLayerPtr layerRelU = nullptr; - // verification if there is Conv-RELU patern - // currently it is only supported - - // if only one output from conv and if it is an output to relu - if (layerToClone->outData.size() == 1 - && layerToClone->outData[0]->inputTo.size() == 1 - && CaselessEq()(layerToClone->outData[0]->inputTo.begin()->second->name, "relu")) { - layerRelU = layerToClone->outData[0]->inputTo.begin()->second; - } - - CNNNetwork n = createICNNNetworkForLayer(layerToClone, layerRelU ? true : false); - if (_cBatch != 0) { - auto input_shapes = n.getInputShapes(); - std::string input_name; - SizeVector input_shape; - std::tie(input_name, input_shape) = *input_shapes.begin(); - input_shape[0] = _cBatch; - input_shapes[input_name] = input_shape; - n.reshape(input_shapes); - } - - // Initialize statistic - ICNNNetworkStats *pstats = nullptr; - ICNNNetwork &in = n; - StatusCode s = in.getStats(&pstats, nullptr); - if (s == StatusCode::OK && pstats) { - pstats->setNodesStats(stat); - } - - InferenceEngine::InputsDataMap inputs = n.getInputsInfo(); - DataPtr q = inputs.begin()->second->getInputData(); - - ExecutableNetwork enetwork = _pluginI8C.LoadNetwork(n, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } }); - _singleLayerNetworks.push_back(enetwork); - InferenceEngine::InferRequest request = enetwork.CreateInferRequest(); - std::string inputName = layerToClone->insData[0].lock()->name; - request.SetBlob(inputName, _inferRequestI8C.GetBlob(inputName)); - _singleLayerRequests[layerToClone->name] = { request, layerRelU ? layerRelU->name : layerToClone->name, layerToClone->name }; - } -} - - -void Int8Calibrator::collectCalibrationStatistic(size_t pics) { - if (_collectByLayer) { - std::map::iterator it = _singleLayerRequests.begin(); - while (it != _singleLayerRequests.end()) { - it->second._request.Infer(); - Blob::Ptr expected = _inferRequestI8C.GetBlob(it->second._outputName); - Blob::Ptr result = it->second._request.GetBlob(it->second._outputName); - float diff = compare_NRMSD(result, expected); - it->second._int8Accuracy.push_back(diff); - it++; - } - } - if (_collectStatistic) { - for (auto l : _statData.registeredLayers()) { - auto outBlob = _inferRequestI8C.GetBlob(l); - - std::string outName = l; - if (_inputsFromLayers.find(l) != _inputsFromLayers.end()) { - outName = _inputsFromLayers[l]; - } - - size_t N, C; - if (outBlob->dims().size() == 4 && outBlob->layout() == Layout::NCHW) { - // TODO(amalyshe) cahnge to using of tensor desc - N = pics; - C = outBlob->dims()[2]; - } else if (outBlob->dims().size() == 2 && outBlob->layout() == Layout::NC) { - N = pics; - C = outBlob->dims()[0]; - } else { - continue; - } - - // Counting min/max outputs per channel - for (size_t n = 0; n < N; n++) { - if (outBlob->dims().size() == 4) { - size_t _HW = outBlob->dims()[0] * outBlob->dims()[1]; - for (size_t c = 0; c < C; c++) { - if (outBlob->getTensorDesc().getPrecision() == Precision::FP32) { - float *ptr = &outBlob->buffer().as()[(n * C + c) * _HW]; - _statData.addTensorStatistics(outName, c, ptr, _HW); - } else if (outBlob->getTensorDesc().getPrecision() == Precision::U8) { - uint8_t *ptr = &outBlob->buffer().as()[(n * C + c) * _HW]; - _statData.addTensorStatistics(outName, c, ptr, _HW); - } else { - throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name()); - } - } - } else if (outBlob->dims().size() == 2) { - if (outBlob->getTensorDesc().getPrecision() == Precision::FP32) { - float *ptr = &outBlob->buffer().as()[n * C]; - _statData.addTensorStatistics(outName, 0, ptr, C); - } else if (outBlob->getTensorDesc().getPrecision() == Precision::U8) { - uint8_t *ptr = &outBlob->buffer().as()[n * C]; - _statData.addTensorStatistics(outName, 0, ptr, C); - } else { - throw std::logic_error(std::string("Unsupported precision: ") + outBlob->getTensorDesc().getPrecision().name()); - } - } - } - } - } -} - -void Int8Calibrator::calculateLayersAccuracyDrop() { - _layersAccuracyDrop.clear(); - - std::map::iterator it = _singleLayerRequests.begin(); - while (it != _singleLayerRequests.end()) { - // calculate average metric per layer over all images and sort in desc order - float mo = 0.f; - for (auto d : it->second._int8Accuracy) { - mo += d; - } - mo = mo / it->second._int8Accuracy.size(); - _layersAccuracyDrop[it->first] = mo; - it++; - } - - // correction of accuracy drop to have sorted values for cases when accuracy drop is equal - // correction is added according to topological order - // this will prioritize returning of layers to FP32 starting from layers closer to the end of network - std::vector ordered = InferenceEngine::details::CNNNetSortTopologically(networkReaderC.getNetwork()); - float c = 0.00001f; - for (auto l : ordered) { - auto it = _layersAccuracyDrop.find(l->name); - if (it != _layersAccuracyDrop.end()) { - it->second += c; - } - c += 0.00001f; - } - _singleLayerRequests.clear(); -} - -std::map Int8Calibrator::layersAccuracyDrop() { - return _layersAccuracyDrop; -} - - - -//-------------------------------------------------------------------------------------------------- - -ClassificationCalibrator::ClassificationCalibrator(int nPictures, const std::string &flags_m, - const std::string &flags_d, const std::string &flags_i, - int flags_b, InferenceEngine::InferencePlugin plugin, - CsvDumper &dumper, const std::string &flags_l, - PreprocessingOptions preprocessingOptions, bool zeroBackground) : - ClassificationProcessor(flags_m, flags_d, flags_i, flags_b, - plugin, dumper, flags_l, - preprocessingOptions, zeroBackground) { - _modelFileNameI8C = modelFileName; - _pluginI8C = plugin; - _nPictures = nPictures; - _cBatch = flags_b; -} - -shared_ptr ClassificationCalibrator::Process(bool stream_output) { - inferRequest = _inferRequestI8C; - int top1Result = 0, total = 0; - - ClassificationSetGenerator generator; - - try { - generator.readLabels(labelFileName); - } catch (InferenceEngine::details::InferenceEngineException& ex) { - slog::warn << "Can't read labels file " << labelFileName << slog::endl; - slog::warn << "Error: " << ex.what() << slog::endl; - } - auto validationMap = generator.getValidationMap(imagesPath); - - if (validationMap.empty()) { - THROW_IE_EXCEPTION << "The validation dataset in " << imagesPath << "is empty. Check the dataset file or folder and the labels file"; - } - - ImageDecoder decoder; - - // ----------------------------Do inference------------------------------------------------------------- - std::vector expected(batch); - std::vector files(batch); - - if (!_nPictures) { - _nPictures = validationMap.size(); - } - - - ConsoleProgress progress(_nPictures, stream_output); - - CalibrationMetrics im; - - std::string firstInputName = this->inputInfo.begin()->first; - std::string firstOutputName = this->outInfo.begin()->first; - auto firstInputBlob = inferRequest.GetBlob(firstInputName); - auto firstOutputBlob = inferRequest.GetBlob(firstOutputName); - - size_t ipics = 0; - auto iter = validationMap.begin(); - while (iter != validationMap.end() && ipics < _nPictures) { - size_t b = 0; - int filesWatched = 0; - for (; b < batch && iter != validationMap.end() && ipics + b < _nPictures ; b++, iter++, filesWatched++) { - expected[b] = iter->first; - try { - decoder.insertIntoBlob(iter->second, b, *firstInputBlob, preprocessingOptions); - files[b] = iter->second; - } catch (const InferenceEngineException &iex) { - slog::warn << "Can't read file " << iter->second << slog::endl; - slog::warn << "Error: " << iex.what() << slog::endl; - // Could be some non-image file in directory - b--; - continue; - } - } - ipics += batch; - - Infer(progress, filesWatched, im); - collectCalibrationStatistic(b); - - std::vector results; - InferenceEngine::TopResults(1, *firstOutputBlob, results); - for (size_t i = 0; i < b; i++) { - int expc = expected[i]; - if (zeroBackground) expc++; - bool top1Scored = (static_cast(results[i]) == expc); - if (top1Scored) top1Result++; - total++; - } - } - progress.finish(); - - calculateLayersAccuracyDrop(); - - if (total == 0) { - throw std::logic_error("total can't be equal to zero"); - } - - im.AccuracyResult = static_cast(top1Result) / static_cast(total); - - return std::shared_ptr(new CalibrationMetrics(im)); -} - -//-------------------------------------------------------------------------------------------------- -SSDObjectDetectionCalibrator::SSDObjectDetectionCalibrator(int nPictures, const std::string &flags_m, - const std::string &flags_d, const std::string &flags_i, - const std::string &subdir, int flags_b, - double threshold, - InferencePlugin plugin, CsvDumper &dumper, - const std::string &flags_a, const std::string &classes_list_file) : - SSDObjectDetectionProcessor(flags_m, flags_d, flags_i, subdir, flags_b, - threshold, - plugin, dumper, - flags_a, classes_list_file) { - _modelFileNameI8C = modelFileName; - _pluginI8C = plugin; - _nPictures = nPictures; - _cBatch = flags_b; -} - -shared_ptr SSDObjectDetectionCalibrator::Process(bool stream_output) { - inferRequest = _inferRequestI8C; - - // Parsing PASCAL VOC2012 format - VOCAnnotationParser vocAnnParser; - VOCAnnotationCollector annCollector(annotationsPath); - - - if (annCollector.annotations().size() == 0) { - ObjectDetectionInferenceMetrics emptyIM(this->threshold); - - return std::shared_ptr(new ObjectDetectionInferenceMetrics(emptyIM)); - } - - // Getting desired results from annotations - std::map desiredForFiles; - - for (auto &ann : annCollector.annotations()) { - std::list dobList; - for (auto &obj : ann.objects) { - DetectedObject dob(classes[obj.name], static_cast(obj.bndbox.xmin), static_cast(obj.bndbox.ymin), - static_cast(obj.bndbox.xmax), static_cast(obj.bndbox.ymax), 1.0f, obj.difficult != 0); - dobList.push_back(dob); - } - ImageDescription id(dobList); - desiredForFiles.insert(std::pair(ann.folder + "/" + (!subdir.empty() ? subdir + "/" : "") + ann.filename, id)); - } - - for (auto &item : outInfo) { - DataPtr outputData = item.second; - if (!outputData) { - throw std::logic_error("output data pointer is not valid"); - } - } - // ----------------------------------------------------------------------------------------------------- - - // ----------------------------Do inference------------------------------------------------------------- - - std::vector expected(batch); - - if (!_nPictures) { - _nPictures = annCollector.annotations().size(); - } - - ConsoleProgress progress(_nPictures, stream_output); - - ObjectDetectionInferenceMetrics im(threshold); - - vector::const_iterator iter = annCollector.annotations().begin(); - - std::map scaledDesiredForFiles; - - std::string firstInputName = this->inputInfo.begin()->first; - auto firstInputBlob = inferRequest.GetBlob(firstInputName); - size_t ipics = 0; - - while (iter != annCollector.annotations().end() && ipics < _nPictures) { - std::vector files; - size_t b = 0; - - int filesWatched = 0; - for (; b < batch && iter != annCollector.annotations().end(); b++, iter++, filesWatched++) { - expected[b] = *iter; - string filename = iter->folder + "/" + (!subdir.empty() ? subdir + "/" : "") + iter->filename; - try { - float scale_x, scale_y; - - scale_x = 1.0f / iter->size.width; // orig_size.width; - scale_y = 1.0f / iter->size.height; // orig_size.height; - - if (scaleProposalToInputSize) { - scale_x *= firstInputBlob->dims()[0]; - scale_y *= firstInputBlob->dims()[1]; - } - - // Scaling the desired result (taken from the annotation) to the network size - scaledDesiredForFiles.insert(std::pair(filename, desiredForFiles.at(filename).scale(scale_x, scale_y))); - - files.push_back(filename); - } catch (const InferenceEngineException &iex) { - slog::warn << "Can't read file " << this->imagesPath + "/" + filename << slog::endl; - slog::warn << "Error: " << iex.what() << slog::endl; - // Could be some non-image file in directory - b--; - continue; - } - ipics++; - } - - // Infer model - Infer(progress, filesWatched, im); - collectCalibrationStatistic(b); - - // Processing the inference result - std::map> detectedObjects = processResult(files); - - // Calculating similarity - // - for (size_t j = 0; j < files.size(); j++) { - ImageDescription result(detectedObjects[files[j]]); - im.apc.consumeImage(result, scaledDesiredForFiles.at(files[j])); - } - } - progress.finish(); - - calculateLayersAccuracyDrop(); - - CalibrationMetrics imCalibration; - const ObjectDetectionInferenceMetrics &odim = dynamic_cast(im); - if (im.nRuns > 0) { - std::map appc = odim.apc.calculateAveragePrecisionPerClass(); - - double mAP = 0; - for (auto i : appc) { - mAP += i.second; - } - imCalibration.AccuracyResult = static_cast(mAP / appc.size()); - } - return std::shared_ptr(new CalibrationMetrics(imCalibration)); -} - - diff --git a/inference-engine/samples/calibration_tool/calibrator_processors.h b/inference-engine/samples/calibration_tool/calibrator_processors.h deleted file mode 100644 index d3c97370e08744..00000000000000 --- a/inference-engine/samples/calibration_tool/calibrator_processors.h +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include "inference_engine.hpp" -#include "ClassificationProcessor.hpp" -#include "SSDObjectDetectionProcessor.hpp" -#include "data_stats.h" -#include -#include - -/** - * Calibrator class representing unified stages for calibration of any kind of networks -*/ -class Int8Calibrator { -public: - /** - * Intermediate structure storing of data for measurements of by-layer statistic of accuracy drop - */ - struct SingleLayerData { - InferenceEngine::InferRequest _request; - std::string _outputName; - std::string _outputI8Name; - std::vector _int8Accuracy; - }; - - /** - * Initializes state to collect accuracy of FP32 network and collect statistic - * of activations. The statistic of activations is stored in _statData and has all max/min for all - * layers and for all pictures - * The inference of all pictures and real collect of the statistic happen during call of - * Processor::Process() - */ - void collectFP32Statistic(); - - /** - * Initializes a state to collect intermediate numeric accuracy drop happening during quantization of - * certain layer to int8. The numeric accuracy drop is measured using NRMSD metric. - * - * For this purpose it creates dedicated network for certain layer, initializes this - * network by statistic that cause execute dedicated network in int8 mode. - * - * In addition to original network we create full original network executed in FP32 mode, and - * register all layers as output ones. - * Information from these layers is used as - * a) input to dedicated layer networks - * b) comparison for NRMSD algorithm between I8 and FP32 calc - * - * The inference of all pictures and real collect of the drop happen during call of - * Processor::Process() - * @param stat - */ - void collectByLayerStatistic(const InferenceEngine::NetworkStatsMap &stat); - - /** - * Initialize state to collect accuracy drop in int8 mode to be compared later vs FP32 accuracy - * metric. - * - * The inference of all pictures and real collect of the accuracy happen during call of - * Processor::Process() - * - * @param stat - The statistic for normalization - * @param layersToInt8 - list of layers planned to be executed in int8. if layer is absent in this - * map, it is assumed that it will be executed in int8 - * @param convertFullyConnected - should the FullyConnected layers be converted into Int8 or not - */ - void validateInt8Config(const InferenceEngine::NetworkStatsMap &stat, - const std::map& layersToInt8, - bool convertFullyConnected); - - /** - * Statistic collected in the collectFP32Statistic is processed with threshold passed as a parameter - * for this method. All values for each layers and for all pictures are sorted and number of min/max - * values which exceed threshold is thrown off - * @param threshold - parameter for thrown off outliers in activation statistic - * @return InferenceEngine::NetworkStatsMap - mapping of layer name to NetworkNodeStatsPtr - */ - InferenceEngine::NetworkStatsMap getStatistic(float threshold); - - /** - * returns by-layer accuracy drop container - */ - std::map layersAccuracyDrop(); - -protected: - /** - * This function should be called from final callibrator after and each Infer for each picture - * It calculates by layer accuracy drop and as well it also collect activation values statistic - */ - void collectCalibrationStatistic(size_t pics); - - /** - * This function should be called from calibration class after Infer of all picture - * It calculates average NRMSD based accuracy drop for each layer and fills _layersAccuracyDrop - */ - void calculateLayersAccuracyDrop(); - - bool _collectByLayer = false; - bool _collectStatistic = true; - InferencePlugin _pluginI8C; - std::string _modelFileNameI8C; - InferenceEngine::CNNNetReader networkReaderC; - InferenceEngine::InferRequest _inferRequestI8C; - int _cBatch = 0; - - size_t _nPictures = 0; - -private: - /** - * helper function for getting statistic for input layers. For getting statistic for them, we are - * adding scalshift just after the input with scale == 1 and shift == 0 - */ - CNNLayerPtr addScaleShiftBeforeLayer(std::string name, InferenceEngine::CNNLayer::Ptr beforeLayer, - size_t port, std::vector scale); - - /** - * Returns Normalized root-mean-square deviation metric for two blobs passed to the function - */ - float compare_NRMSD(InferenceEngine::Blob::Ptr res, InferenceEngine::Blob::Ptr ref); - - /** - * Creates dedicated i8 network around selected layer. Currently this network beside layer itself - * has to have ReLU and ScaleShift layers. - * Since Inference Engine API mostly directed to the loading of network from IR, we need to create - * such IR first, read through stream and modify network to correspond required parameters - */ - InferenceEngine::CNNNetwork createICNNNetworkForLayer(InferenceEngine::CNNLayer::Ptr layerToClone, - bool hasReLU); - - std::map _layersAccuracyDrop; - std::vector _singleLayerNetworks; - std::map _singleLayerRequests; - std::map _inputsFromLayers; - AggregatedDataStats _statData; -}; - -/** - * This class represents the only one generalized metric which will be used for comparison of - * accuracy drop - */ -struct CalibrationMetrics : public ClassificationProcessor::InferenceMetrics { -public: - float AccuracyResult = 0; -}; - -/** - * Сalibration class for classification networks. - * Responsible for proper post processing of results and calculate of Top1 metric which is used as - * universal metric for accuracy and particiapted in verification of accuracy drop - */ -class ClassificationCalibrator : public ClassificationProcessor, public Int8Calibrator { -public: - ClassificationCalibrator(int nPictures, const std::string &flags_m, const std::string &flags_d, - const std::string &flags_i, int flags_b, - InferenceEngine::InferencePlugin plugin, CsvDumper &dumper, const std::string &flags_l, - PreprocessingOptions preprocessingOptions, bool zeroBackground); - - shared_ptr Process(bool stream_output = false) override; -}; - - -/** -* Calibration class for SSD object detection networks. -* Responsible for proper post processing of results and calculate of mAP metric which is used as -* universal metric for accuracy and participated in verification of accuracy drop -*/ -class SSDObjectDetectionCalibrator : public SSDObjectDetectionProcessor, public Int8Calibrator { -public: - SSDObjectDetectionCalibrator(int nPictures, const std::string &flags_m, const std::string &flags_d, - const std::string &flags_i, const std::string &subdir, int flags_b, - double threshold, - InferencePlugin plugin, CsvDumper &dumper, - const std::string &flags_a, const std::string &classes_list_file); - - shared_ptr Process(bool stream_output = false) override; -}; diff --git a/inference-engine/samples/calibration_tool/data_stats.cpp b/inference-engine/samples/calibration_tool/data_stats.cpp deleted file mode 100644 index ecee50b15936cb..00000000000000 --- a/inference-engine/samples/calibration_tool/data_stats.cpp +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "data_stats.h" - - -TensorStatistic::TensorStatistic(float* data, size_t count, size_t nbuckets) { - _min = std::numeric_limits::max(); - _max = std::numeric_limits::min(); - for (size_t i = 0; i < count; i++) { - float val = static_cast(data[i]); - if (_min > val) { - _min = val; - } - - if (_max < val) { - _max = val; - } - } - - if (_min == _max) { - return; - } -} - -float TensorStatistic::getMaxValue() const { - return _max; -} - - -float TensorStatistic::getMinValue() const { - return _min; -} - -std::vector AggregatedDataStats::registeredLayers() { - std::vector layers; - for (auto l : _data) { - layers.push_back(l.first); - } - return layers; -} - -void AggregatedDataStats::registerLayer(std::string layer) { - _data[layer]; -} - -void AggregatedDataStats::addTensorStatistics(const std::string& name, size_t channel, float* data, size_t count) { - auto&& byChannel = _data[name]; - byChannel[channel].push_back(TensorStatistic(data, count)); -} - -void AggregatedDataStats::addTensorStatistics(const std::string &name, size_t channel, uint8_t *data, size_t count) { - std::vector intermediate; - for (size_t i = 0; i < count; i++) { - intermediate.push_back(data[i]); - } - addTensorStatistics(name, channel, intermediate.data(), count); -} - -size_t AggregatedDataStats::getNumberChannels(const std::string& name) const { - auto it = _data.find(name); - if (it != _data.end()) { - return it->second.size(); - } - return 0; -} - -void AggregatedDataStats::getDataMinMax(const std::string& name, size_t channel, float& min, float& max, float threshold) { - // take data by name - auto it = _data.find(name); - if (it != _data.end()) { - auto stats = it->second[channel]; - // having absolute min/max values, we can create new statistic - std::vector maxValues; - std::vector minValues; - for (size_t i = 0; i < stats.size(); i++) { - const TensorStatistic& tsS = stats[i]; - maxValues.push_back(tsS.getMaxValue()); - minValues.push_back(tsS.getMinValue()); - } - // define number of elements to throw out - size_t elementToTake = static_cast(maxValues.size() * (threshold / 100)); - int elementsToThrow = maxValues.size() - elementToTake; - std::sort(maxValues.begin(), maxValues.end()); - std::sort(minValues.begin(), minValues.end()); - - min = minValues[elementsToThrow]; - max = maxValues[elementToTake - 1]; - } else { - min = max = 0.f; - } -} - diff --git a/inference-engine/samples/calibration_tool/data_stats.h b/inference-engine/samples/calibration_tool/data_stats.h deleted file mode 100644 index 9f2c375e135657..00000000000000 --- a/inference-engine/samples/calibration_tool/data_stats.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include - -struct TensorStatistic { - TensorStatistic(float* data, size_t count, size_t nbuckets = 1000); - float getMaxValue() const; - float getMinValue()const; -protected: - float _min; - float _max; -}; - -class AggregatedDataStats { -public: - void addTensorStatistics(const std::string& name, size_t channel, float* data, size_t count); - void addTensorStatistics(const std::string &name, size_t channel, uint8_t *data, size_t count); - void getDataMinMax(const std::string& name, size_t channel, float& min, float& max, float threshold); - size_t getNumberChannels(const std::string& name) const; - std::vector registeredLayers(); - void registerLayer(std::string layer); -protected: - std::map > > _data; -}; - diff --git a/inference-engine/samples/calibration_tool/main.cpp b/inference-engine/samples/calibration_tool/main.cpp deleted file mode 100644 index 2a63d4b62fff23..00000000000000 --- a/inference-engine/samples/calibration_tool/main.cpp +++ /dev/null @@ -1,557 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief The entry point for Inference Engine validation application - * @file validation_app/main.cpp - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include "user_exception.hpp" -#include "calibrator_processors.h" -#include "SSDObjectDetectionProcessor.hpp" -#include "YOLOObjectDetectionProcessor.hpp" -#include "ie_icnn_network_stats.hpp" -#include "details/caseless.hpp" - -using namespace std; -using namespace InferenceEngine; -using namespace InferenceEngine::details; - -using InferenceEngine::details::InferenceEngineException; - -/// @brief Message for help argument -static const char help_message[] = "Print a help message"; -/// @brief Message for images argument -static const char image_message[] = "Required. Path to a directory with validation images. For Classification models, the directory must contain" - " folders named as labels with images inside or a .txt file with" - " a list of images. For Object Detection models, the dataset must be in" - " VOC format."; -/// @brief Message for plugin_path argument -static const char plugin_path_message[] = "Path to a plugin folder"; -/// @brief message for model argument -static const char model_message[] = "Required. Path to an .xml file with a trained model, including model name and " - "extension."; -/// @brief Message for plugin argument -static const char plugin_message[] = "Plugin name. For example, CPU. If this parameter is passed, " - "the sample looks for a specified plugin only."; -/// @brief Message for assigning cnn calculation to device -static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD." - " The application looks for a suitable plugin for the specified device."; -/// @brief Message for label argument -static const char label_message[] = "Path to a file with labels for a model"; -/// @brief M`essage for batch argumenttype -static const char batch_message[] = "Batch size value. If not specified, the batch size value is taken from IR"; -/// @brief Message for dump argument -static const char dump_message[] = "Dump file names and inference results to a .csv file"; -/// @brief Message for network type -static const char type_message[] = "Type of an inferred network (\"C\" by default)"; -/// @brief Message for pp-type -static const char preprocessing_type[] = "Preprocessing type. Options: \"None\", \"Resize\", \"ResizeCrop\""; -/// @brief Message for pp-crop-size -static const char preprocessing_size[] = "Preprocessing size (used with ppType=\"ResizeCrop\")"; -static const char preprocessing_width[] = "Preprocessing width (overrides -ppSize, used with ppType=\"ResizeCrop\")"; -static const char preprocessing_height[] = "Preprocessing height (overrides -ppSize, used with ppType=\"ResizeCrop\")"; - -static const char obj_detection_annotations_message[] = "Required for Object Detection models. Path to a directory" - " containing an .xml file with annotations for images."; - -static const char obj_detection_classes_message[] = "Required for Object Detection models. Path to a file with" - " a list of classes"; - -static const char obj_detection_subdir_message[] = "Directory between the path to images (specified with -i) and image name (specified in the" - " .xml file). For VOC2007 dataset, use JPEGImages."; - -static const char obj_detection_kind_message[] = "Type of an Object Detection model. Options: SSD"; - -/// @brief Message for GPU custom kernels desc -static const char custom_cldnn_message[] = "Required for GPU custom kernels. " - "Absolute path to an .xml file with the kernel descriptions."; - -/// @brief Message for user library argument -static const char custom_cpu_library_message[] = "Required for CPU custom layers. " - "Absolute path to a shared library with the kernel implementations."; -/// @brief Message for labels file -static const char labels_file_message[] = "Labels file path. The labels file contains names of the dataset classes"; - -static const char zero_background_message[] = "\"Zero is a background\" flag. Some networks are trained with a modified" - " dataset where the class IDs " - " are enumerated from 1, but 0 is an undefined \"background\" class" - " (which is never detected)"; - -static const char stream_output_message[] = "Flag for printing progress as a plain text. When used, interactive progress" - " bar is replaced with multiline output"; - -static const char convert_fc_message[] = "Convert FullyConnected layers to Int8 or not (false by default)"; - - -/// @brief Network type options and their descriptions -static const char* types_descriptions[][2] = { - { "C", "calibrate Classification network and write the calibrated network to IR" }, -// { "SS", "semantic segmentation" }, // Not supported yet - { "OD", "calibrate Object Detection network and write the calibrated network to IR" }, - { "RawC", "collect only statistics for Classification network and write statistics to IR. With this option, a model is not calibrated. For calibration " - "and statisctics collection, use \"-t C\" instead." }, - { "RawOD", "collect only statistics for Object Detection network and write statistics to IR. With this option, a model is not calibrated. For calibration " - "and statisctics collection, use \"-t OD\" instead" }, - { nullptr, nullptr } -}; - -static const char accuracy_threshold_message[] = "Threshold for a maximum accuracy drop of quantized model." - " Must be an integer number (percents)" - " without a percent sign. Default value is 1, which stands for accepted" - " accuracy drop in 1%"; -static const char number_of_pictures_message[] = "Number of pictures from the whole validation set to" - "create the calibration dataset. Default value is 0, which stands for" - "the whole provided dataset"; -static const char output_model_name[] = "Output name for calibrated model. Default is _i8.xml|bin"; - -/// @brief Define flag for showing help message
-DEFINE_bool(h, false, help_message); -/// @brief Define parameter for a path to images
-/// It is a required parameter -DEFINE_string(i, "", image_message); -/// @brief Define parameter for a path to model file
-/// It is a required parameter -DEFINE_string(m, "", model_message); -/// @brief Define parameter for a plugin name
-/// It is a required parameter -DEFINE_string(p, "", plugin_message); -/// @brief Define parameter for a path to a file with labels
-/// Default is empty -DEFINE_string(OCl, "", label_message); -/// @brief Define parameter for a path to plugins
-/// Default is ./lib -DEFINE_string(pp, "", plugin_path_message); -/// @brief Define paraneter for a target device to infer on
-DEFINE_string(d, "CPU", target_device_message); -/// @brief Define parameter for batch size
-/// Default is 0 (which means that batch size is not specified) -DEFINE_int32(b, 0, batch_message); -/// @brief Define flag to dump results to a file
-DEFINE_bool(dump, false, dump_message); -/// @brief Define parameter for a network type -DEFINE_string(t, "C", type_message); - -/// @brief Define parameter for preprocessing type -DEFINE_string(ppType, "", preprocessing_type); - -/// @brief Define parameter for preprocessing size -DEFINE_int32(ppSize, 0, preprocessing_size); -DEFINE_int32(ppWidth, 0, preprocessing_width); -DEFINE_int32(ppHeight, 0, preprocessing_height); - -DEFINE_bool(Czb, false, zero_background_message); - -DEFINE_string(ODa, "", obj_detection_annotations_message); - -DEFINE_string(ODc, "", obj_detection_classes_message); - -DEFINE_string(ODsubdir, "", obj_detection_subdir_message); - -/// @brief Define parameter for a type of Object Detection network -DEFINE_string(ODkind, "SSD", obj_detection_kind_message); - -/// @brief Define parameter for GPU kernels path
-/// Default is ./lib -DEFINE_string(c, "", custom_cldnn_message); - -/// @brief Define parameter for a path to CPU library with user layers
-/// It is an optional parameter -DEFINE_string(l, "", custom_cpu_library_message); - -/// @brief Define parameter for accuracy drop threshold -DEFINE_double(threshold, 1.0f, accuracy_threshold_message); - -/// @brief Define path to output calibrated model -DEFINE_bool(stream_output, false, stream_output_message); - -DEFINE_int32(subset, 0, number_of_pictures_message); - -DEFINE_string(output, "", output_model_name); - -DEFINE_string(lbl, "", labels_file_message); - -DEFINE_bool(convert_fc, false, convert_fc_message); - -/** - * @brief This function shows a help message - */ -static void showUsage() { - std::cout << std::endl; - std::cout << "Usage: calibration_tool [OPTION]" << std::endl << std::endl; - std::cout << "Available options:" << std::endl; - std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -t " << type_message << std::endl; - for (int i = 0; types_descriptions[i][0] != nullptr; i++) { - std::cout << " -t \"" << types_descriptions[i][0] << "\" to " << types_descriptions[i][1] << std::endl; - } - std::cout << " -i " << image_message << std::endl; - std::cout << " -m " << model_message << std::endl; - std::cout << " -lbl " << labels_file_message << std::endl; - std::cout << " -l " << custom_cpu_library_message << std::endl; - std::cout << " -c " << custom_cldnn_message << std::endl; - std::cout << " -d " << target_device_message << std::endl; - std::cout << " -b N " << batch_message << std::endl; - std::cout << " -ppType " << preprocessing_type << std::endl; - std::cout << " -ppSize N " << preprocessing_size << std::endl; - std::cout << " -ppWidth W " << preprocessing_width << std::endl; - std::cout << " -ppHeight H " << preprocessing_height << std::endl; - std::cout << " --dump " << dump_message << std::endl; - std::cout << " -subset " << number_of_pictures_message << std::endl; - std::cout << " -output " << output_model_name << std::endl; - std::cout << " -threshold " << accuracy_threshold_message << std::endl; - - std::cout << std::endl; - std::cout << " Classification-specific options:" << std::endl; - std::cout << " -Czb true " << zero_background_message << std::endl; - - std::cout << std::endl; - std::cout << " Object detection-specific options:" << std::endl; - std::cout << " -ODkind " << obj_detection_kind_message << std::endl; - std::cout << " -ODa " << obj_detection_annotations_message << std::endl; - std::cout << " -ODc " << obj_detection_classes_message << std::endl; - std::cout << " -ODsubdir " << obj_detection_subdir_message << std::endl << std::endl; - - std::cout << std::endl; - std::cout << " -stream_output " << stream_output_message << std::endl; -} - -enum NetworkType { - Undefined = -1, - Classification, - ObjDetection, - RawC, - RawOD -}; - -std::string strtolower(const std::string& s) { - std::string res = s; - std::transform(res.begin(), res.end(), res.begin(), ::tolower); - return res; -} - -void SaveCalibratedIR(const std::string &originalName, - const std::string &outModelName, - const std::map& layersToInt8, - const InferenceEngine::NetworkStatsMap& statMap, - bool convertFullyConnected) { - slog::info << "Layers profile for Int8 quantization\n"; - CNNNetReader networkReader; - networkReader.ReadNetwork(originalName); - if (!networkReader.isParseSuccess())THROW_IE_EXCEPTION << "cannot load a failed Model"; - - /** Extract model name and load weights **/ - std::string binFileName = fileNameNoExt(originalName)+ ".bin"; - networkReader.ReadWeights(binFileName.c_str()); - - auto network = networkReader.getNetwork(); - for (auto &&layer : network) { - if (CaselessEq()(layer->type, "convolution")) { - auto it = layersToInt8.find(layer->name); - if (it != layersToInt8.end() && it->second == false) { - layer->params["quantization_level"] = "FP32"; - std::cout << layer->name << ": " << "FP32" << std::endl; - } else { - layer->params["quantization_level"] = "I8"; - std::cout << layer->name << ": " << "I8" << std::endl; - } - } else if (CaselessEq()(layer->type, "fullyconnected")) { - if (!convertFullyConnected) { - layer->params["quantization_level"] = "FP32"; - std::cout << layer->name << ": " << "FP32" << std::endl; - } else { - layer->params["quantization_level"] = "I8"; - std::cout << layer->name << ": " << "I8" << std::endl; - } - } - } - - - ICNNNetworkStats* pstats = nullptr; - StatusCode s = ((ICNNNetwork&)networkReader.getNetwork()).getStats(&pstats, nullptr); - if (s == StatusCode::OK && pstats) { - pstats->setNodesStats(statMap); - } - - slog::info << "Write calibrated network to " << outModelName << ".(xml|bin) IR file\n"; - networkReader.getNetwork().serialize(outModelName + ".xml", outModelName + ".bin"); -} - -/** - * @brief The main function of inference engine sample application - * @param argc - The number of arguments - * @param argv - Arguments - * @return 0 if all good - */ -int main(int argc, char *argv[]) { - try { - slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; - - // ---------------------------Parsing and validating input arguments-------------------------------------- - slog::info << "Parsing input parameters" << slog::endl; - - bool noOptions = argc == 1; - - gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); - if (FLAGS_h || noOptions) { - showUsage(); - return 1; - } - - UserExceptions ee; - - NetworkType netType = Undefined; - // Checking the network type - if (std::string(FLAGS_t) == "C") { - netType = Classification; - } else if (std::string(FLAGS_t) == "OD") { - netType = ObjDetection; - } else if (std::string(FLAGS_t) == "RawC") { - netType = RawC; - } else if (std::string(FLAGS_t) == "RawOD") { - netType = RawOD; - } else { - ee << UserException(5, "Unknown network type specified (invalid -t option)"); - } - - // Checking required options - if (FLAGS_m.empty()) ee << UserException(3, "Model file is not specified (missing -m option)"); - if (FLAGS_i.empty()) ee << UserException(4, "Images list is not specified (missing -i option)"); - if (FLAGS_d.empty()) ee << UserException(5, "Target device is not specified (missing -d option)"); - if (FLAGS_b < 0) ee << UserException(6, "Batch must be positive (invalid -b option value)"); - - if (netType == ObjDetection) { - // Checking required OD-specific options - if (FLAGS_ODa.empty()) ee << UserException(11, "Annotations folder is not specified for object detection (missing -a option)"); - if (FLAGS_ODc.empty()) ee << UserException(12, "Classes file is not specified (missing -c option)"); - } - - if (!ee.empty()) throw ee; - // ----------------------------------------------------------------------------------------------------- - - // ---------------------Loading plugin for Inference Engine------------------------------------------------ - slog::info << "Loading plugin" << slog::endl; - /** Loading the library with extensions if provided**/ - InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d); - - /** Loading default extensions **/ - if (FLAGS_d.find("CPU") != std::string::npos) { - /** - * cpu_extensions library is compiled from "extension" folder containing - * custom CPU plugin layer implementations. These layers are not supported - * by CPU, but they can be useful for inferring custom topologies. - **/ - plugin.AddExtension(std::make_shared()); - } - - if (!FLAGS_l.empty()) { - // CPU extensions are loaded as a shared library and passed as a pointer to base extension - IExtensionPtr extension_ptr = make_so_pointer(FLAGS_l); - plugin.AddExtension(extension_ptr); - slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; - } - if (!FLAGS_c.empty()) { - // GPU extensions are loaded from an .xml description and OpenCL kernel files - plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}); - slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; - } - - printPluginVersion(plugin, std::cout); - - CsvDumper dumper(FLAGS_dump); - - std::shared_ptr processor; - - PreprocessingOptions preprocessingOptions; - if (strtolower(FLAGS_ppType.c_str()) == "none") { - preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::DoNothing); - } else if (strtolower(FLAGS_ppType) == "resizecrop") { - size_t ppWidth = FLAGS_ppSize; - size_t ppHeight = FLAGS_ppSize; - - if (FLAGS_ppWidth > 0) ppWidth = FLAGS_ppSize; - if (FLAGS_ppHeight > 0) ppHeight = FLAGS_ppSize; - - if (FLAGS_ppSize > 0 || (FLAGS_ppWidth > 0 && FLAGS_ppHeight > 0)) { - preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::ResizeThenCrop, ppWidth, ppHeight); - } else { - THROW_USER_EXCEPTION(2) << "Size must be specified for preprocessing type " << FLAGS_ppType; - } - } else if (strtolower(FLAGS_ppType) == "resize" || FLAGS_ppType.empty()) { - preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::Resize); - } else { - THROW_USER_EXCEPTION(2) << "Unknown preprocessing type: " << FLAGS_ppType; - } - - if (netType == Classification || netType == RawC) { - processor = std::shared_ptr( - new ClassificationCalibrator(FLAGS_subset, FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_b, - plugin, dumper, FLAGS_lbl, preprocessingOptions, FLAGS_Czb)); - } else if (netType == ObjDetection || netType == RawOD) { - if (FLAGS_ODkind == "SSD") { - processor = std::shared_ptr( - new SSDObjectDetectionCalibrator(FLAGS_subset, FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_ODsubdir, FLAGS_b, - 0.5, plugin, dumper, FLAGS_ODa, FLAGS_ODc)); -/* } else if (FLAGS_ODkind == "YOLO") { - processor = std::shared_ptr( - new YOLOObjectDetectionProcessor(FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_ODsubdir, FLAGS_b, - 0.5, plugin, dumper, FLAGS_ODa, FLAGS_ODc)); -*/ - } - } else { - THROW_USER_EXCEPTION(2) << "Unknown network type specified" << FLAGS_ppType; - } - if (!processor.get()) { - THROW_USER_EXCEPTION(2) << "Processor pointer is invalid" << FLAGS_ppType; - } - - auto calibrator = dynamic_cast(processor.get()); - if (calibrator == nullptr) { - THROW_USER_EXCEPTION(2) << "processor object is not instance of Int8Calibrator class"; - } - - if (netType != RawC && netType != RawOD) { - slog::info << "Collecting accuracy metric in FP32 mode to get a baseline, collecting activation statistics" << slog::endl; - } else { - slog::info << "Collecting activation statistics" << slog::endl; - } - calibrator->collectFP32Statistic(); - shared_ptr pIMFP32 = processor->Process(FLAGS_stream_output); - const auto mFP32 = dynamic_cast(pIMFP32.get()); - if (mFP32 == nullptr) { - THROW_USER_EXCEPTION(2) << "FP32 inference metrics object is not instance of CalibrationMetrics class"; - } - std:: cout << " FP32 Accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% " << std::endl; - - InferenceEngine::NetworkStatsMap statMap; - std::map layersToInt8; - bool bAccuracy = false; - - if (netType != RawC && netType != RawOD) { - slog::info << "Verification of network accuracy if all possible layers converted to INT8" << slog::endl; - float bestThreshold = 100.f; - float maximalAccuracy = 0.f; - for (float threshold = 100.0f; threshold > 95.0f; threshold -= 0.5) { - std::cout << "Validate int8 accuracy, threshold for activation statistics = " << threshold << std::endl; - InferenceEngine::NetworkStatsMap tmpStatMap = calibrator->getStatistic(threshold); - calibrator->validateInt8Config(tmpStatMap, {}, FLAGS_convert_fc); - shared_ptr pIM_I8 = processor->Process(FLAGS_stream_output); - auto *mI8 = dynamic_cast(pIM_I8.get()); - if (mI8 == nullptr) { - THROW_USER_EXCEPTION(2) << "INT8 inference metrics object is not instance of CalibrationMetrics class"; - } - if (maximalAccuracy < mI8->AccuracyResult) { - maximalAccuracy = mI8->AccuracyResult; - bestThreshold = threshold; - } - std::cout << " Accuracy is " << OUTPUT_FLOATING(100.0 * mI8->AccuracyResult) << "%" << std::endl; - } - - statMap = calibrator->getStatistic(bestThreshold); - - if ((mFP32->AccuracyResult - maximalAccuracy) > (FLAGS_threshold / 100)) { - slog::info << "Accuracy of all layers conversion does not correspond to the required threshold\n"; - cout << "FP32 Accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% vs " << - "all Int8 layers Accuracy: " << OUTPUT_FLOATING(100.0 * maximalAccuracy) << "%, " << - "threshold for activation statistics: " << bestThreshold << "%" << std::endl; - slog::info << "Collecting intermediate per-layer accuracy drop" << slog::endl; - // getting statistic on accuracy drop by layers - calibrator->collectByLayerStatistic(statMap); - processor->Process(FLAGS_stream_output); - // starting to reduce number of layers being converted to Int8 - std::map layersAccuracyDrop = calibrator->layersAccuracyDrop(); - - std::map orderedLayersAccuracyDrop; - for (auto d : layersAccuracyDrop) { - orderedLayersAccuracyDrop[d.second] = d.first; - layersToInt8[d.first] = true; - } - auto it = orderedLayersAccuracyDrop.crbegin(); - - shared_ptr pIM_I8; - const CalibrationMetrics *mI8; - while (it != orderedLayersAccuracyDrop.crend() && bAccuracy == false) { - slog::info << "Returning of '" << it->second << "' to FP32 precision, start validation\n"; - layersToInt8[it->second] = false; - calibrator->validateInt8Config(statMap, layersToInt8, FLAGS_convert_fc); - pIM_I8 = processor->Process(FLAGS_stream_output); - mI8 = dynamic_cast(pIM_I8.get()); - maximalAccuracy = mI8->AccuracyResult; - if ((mFP32->AccuracyResult - maximalAccuracy) > (FLAGS_threshold / 100)) { - cout << "FP32 Accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% vs " << - "current Int8 configuration Accuracy: " << OUTPUT_FLOATING(100.0 * maximalAccuracy) << "%" << std::endl; - } else { - bAccuracy = true; - } - it++; - } - } else { - bAccuracy = true; - } - - if (bAccuracy) { - slog::info << "Achieved required accuracy drop satisfying threshold\n"; - cout << "FP32 accuracy: " << OUTPUT_FLOATING(100.0 * mFP32->AccuracyResult) << "% vs " << - "current Int8 configuration accuracy: " << OUTPUT_FLOATING(100.0 * maximalAccuracy) << "% " << - "with threshold for activation statistic: " << bestThreshold << "%" << std::endl; - std::string outModelName = FLAGS_output.empty() ? fileNameNoExt(FLAGS_m) + "_i8" : fileNameNoExt(FLAGS_output); - SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap, FLAGS_convert_fc); - } else { - slog::info << "Required threshold of accuracy drop cannot be achieved with any int8 quantization\n"; - } - } else { - std::cout << "Collected activation statistics, writing maximum values to IR" << std::endl; - statMap = calibrator->getStatistic(100.0f); - std::string outModelName = FLAGS_output.empty() ? fileNameNoExt(FLAGS_m) + "_i8" : fileNameNoExt(FLAGS_output); - SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap, FLAGS_convert_fc); - } - - if (dumper.dumpEnabled()) { - slog::info << "Dump file generated: " << dumper.getFilename() << slog::endl; - } - } catch (const InferenceEngineException& ex) { - slog::err << "Inference problem: \n" << ex.what() << slog::endl; - return 1; - } catch (const UserException& ex) { - slog::err << "Input problem: \n" << ex.what() << slog::endl; - showUsage(); - return ex.exitCode(); - } catch (const UserExceptions& ex) { - if (ex.list().size() == 1) { - slog::err << "Input problem: " << ex.what() << slog::endl; - showUsage(); - return ex.list().begin()->exitCode(); - } else { - slog::err << "Input problems: \n" << ex.what() << slog::endl; - showUsage(); - return ex.list().begin()->exitCode(); - } - } catch (const std::exception& ex) { - slog::err << ex.what() << slog::endl; - return 1; - } catch (...) { - slog::err << "Unknown/internal exception happened." << slog::endl; - return 1; - } - return 0; -} diff --git a/inference-engine/samples/classification_sample/CMakeLists.txt b/inference-engine/samples/classification_sample/CMakeLists.txt deleted file mode 100644 index 1dab0c94f1cc6d..00000000000000 --- a/inference-engine/samples/classification_sample/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set (TARGET_NAME "classification_sample") - -file (GLOB SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${SRC}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${SRC}) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} IE::ie_cpu_extension format_reader gflags) - -if(UNIX) - target_link_libraries(${TARGET_NAME} ${LIB_DL} pthread) -endif() diff --git a/inference-engine/samples/classification_sample/README.md b/inference-engine/samples/classification_sample/README.md deleted file mode 100644 index 9c22dc81703354..00000000000000 --- a/inference-engine/samples/classification_sample/README.md +++ /dev/null @@ -1,68 +0,0 @@ -# Image Classification C++ Sample - -This topic demonstrates how to run the Image Classification sample application, which performs -inference using image classification networks such as AlexNet and GoogLeNet. - -> **NOTE:** This topic describes usage of C++ implementation of the Image Classification Sample. For the Python* implementation, refer to [Image Classification Python* Sample](./inference-engine/ie_bridges/python/sample/classification_sample/README.md). - -## How It Works - -Upon the start-up, the sample application reads command line parameters and loads a network and an image to the Inference -Engine plugin. When inference is done, the application creates an -output image and outputs data to the standard output stream. - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). - -## Running -Running the application with the `-h` option yields the following usage message: -```sh -./classification_sample -h -InferenceEngine: - API version ............ - Build .................. - -classification_sample [OPTION] -Options: - - -h Print a usage message. - -i "" "" Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet - and a .bmp file for the other networks. - -m "" Required. Path to an .xml file with a trained model. - -l "" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations. - Or - -c "" Required for GPU custom kernels. Absolute path to the .xml file with the kernels descriptions. - -pp "" Path to a plugin folder. - -d "" Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified - -nt "" Number of top results. Default value is 10 - -ni "" Number of iterations. Default value is 1 - -pc Enables per-layer performance report - -p_msg Enables messages from a plugin - -``` - -Running the application with the empty list of options yields the usage message given above. - -To run the sample, you can use AlexNet and GoogLeNet or other public or pre-trained image classification models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). - -> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -For example, to perform inference of an AlexNet model on CPU, use the following command: - -```sh -./classification_sample -i /cat.bmp -m /alexnet_fp32.xml -``` - -## Demo Output - -By default the application outputs top-10 inference results. -Add the `-nt` option to the previous command to modify the number of top output results. - -For example, to get the top-5 results on GPU, use the following commands: -```sh -./classification_sample -i /cat.bmp -m /alexnet_fp32.xml -nt 5 -d GPU -``` - -## See Also -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) -* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) -* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) diff --git a/inference-engine/samples/classification_sample/classification_sample.h b/inference-engine/samples/classification_sample/classification_sample.h deleted file mode 100644 index 7b84e6ac112857..00000000000000 --- a/inference-engine/samples/classification_sample/classification_sample.h +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - -#ifdef _WIN32 -#include -#else -#include -#endif - -/// @brief message for help argument -static const char help_message[] = "Print a usage message."; - -/// @brief message for images argument -static const char image_message[] = "Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet"\ - "and a .bmp file for the other networks."; - -/// @brief message for plugin_path argument -static const char plugin_path_message[] = "Path to a plugin folder."; - -/// @brief message for model argument -static const char model_message[] = "Required. Path to an .xml file with a trained model."; - -/// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \ - "Sample will look for a suitable plugin for device specified (CPU by default)"; - -/// @brief message for performance counters -static const char performance_counter_message[] = "Enables per-layer performance report"; - -/// @brief message for top results number -static const char ntop_message[] = "Number of top results. Default value is 10"; - -/// @brief message for iterations count -static const char iterations_count_message[] = "Number of iterations. Default value is 1"; - -/// @brief message for clDNN custom kernels desc -static const char custom_cldnn_message[] = "Required for GPU custom kernels. "\ - "Absolute path to the .xml file with the kernels descriptions."; - -/// @brief message for user library argument -static const char custom_cpu_library_message[] = "Required for CPU custom layers. " \ - "Absolute path to a shared library with the kernels implementations."; - -/// @brief message for plugin messages -static const char plugin_message[] = "Enables messages from a plugin"; - -/// @brief Define flag for showing help message
-DEFINE_bool(h, false, help_message); - -/// @brief Define parameter for set image file
-/// It is a required parameter -DEFINE_string(i, "", image_message); - -/// @brief Define parameter for set model file
-/// It is a required parameter -DEFINE_string(m, "", model_message); - -/// @brief Define parameter for set path to plugins
-DEFINE_string(pp, "", plugin_path_message); - -/// @brief device the target device to infer on
-DEFINE_string(d, "CPU", target_device_message); - -/// @brief Top results number (default 10)
-DEFINE_uint32(nt, 10, ntop_message); - -/// @brief Enable per-layer performance report -DEFINE_bool(pc, false, performance_counter_message); - -/// @brief Define parameter for clDNN custom kernels path
-/// Default is ./lib -DEFINE_string(c, "", custom_cldnn_message); - -/// @brief Absolute path to CPU library with user layers
-/// It is a optional parameter -DEFINE_string(l, "", custom_cpu_library_message); - -/// @brief Iterations count (default 1) -DEFINE_uint32(ni, 1, iterations_count_message); - -/// @brief Enable plugin messages -DEFINE_bool(p_msg, false, plugin_message); - -/** -* @brief This function show a help message -*/ -static void showUsage() { - std::cout << std::endl; - std::cout << "classification_sample [OPTION]" << std::endl; - std::cout << "Options:" << std::endl; - std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -i \"\" " << image_message << std::endl; - std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; - std::cout << " Or" << std::endl; - std::cout << " -c \"\" " << custom_cldnn_message << std::endl; - std::cout << " -pp \"\" " << plugin_path_message << std::endl; - std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -nt \"\" " << ntop_message << std::endl; - std::cout << " -ni \"\" " << iterations_count_message << std::endl; - std::cout << " -pc " << performance_counter_message << std::endl; - std::cout << " -p_msg " << plugin_message << std::endl; -} diff --git a/inference-engine/samples/classification_sample/main.cpp b/inference-engine/samples/classification_sample/main.cpp deleted file mode 100644 index 422e737a593d90..00000000000000 --- a/inference-engine/samples/classification_sample/main.cpp +++ /dev/null @@ -1,314 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include "classification_sample.h" - -using namespace InferenceEngine; - -ConsoleErrorListener error_listener; - -bool ParseAndCheckCommandLine(int argc, char *argv[]) { - // ---------------------------Parsing and validation of input args-------------------------------------- - gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); - if (FLAGS_h) { - showUsage(); - return false; - } - slog::info << "Parsing input parameters" << slog::endl; - - if (FLAGS_ni < 1) { - throw std::logic_error("Parameter -ni should be greater than zero (default 1)"); - } - - if (FLAGS_i.empty()) { - throw std::logic_error("Parameter -i is not set"); - } - - if (FLAGS_m.empty()) { - throw std::logic_error("Parameter -m is not set"); - } - - return true; -} - -/** -* @brief The entry point the Inference Engine sample application -* @file classification_sample/main.cpp -* @example classification_sample/main.cpp -*/ -int main(int argc, char *argv[]) { - try { - slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; - - // ------------------------------ Parsing and validation of input args --------------------------------- - if (!ParseAndCheckCommandLine(argc, argv)) { - return 0; - } - - /** This vector stores paths to the processed images **/ - std::vector imageNames; - parseInputFilesArguments(imageNames); - if (imageNames.empty()) throw std::logic_error("No suitable images were found"); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - slog::info << "Loading plugin" << slog::endl; - InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d); - if (FLAGS_p_msg) { - static_cast(plugin)->SetLogCallback(error_listener); - } - - /** Loading default extensions **/ - if (FLAGS_d.find("CPU") != std::string::npos) { - /** - * cpu_extensions library is compiled from "extension" folder containing - * custom MKLDNNPlugin layer implementations. These layers are not supported - * by mkldnn, but they can be useful for inferring custom topologies. - **/ - plugin.AddExtension(std::make_shared()); - } - - if (!FLAGS_l.empty()) { - // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension - auto extension_ptr = make_so_pointer(FLAGS_l); - plugin.AddExtension(extension_ptr); - slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; - } - if (!FLAGS_c.empty()) { - // clDNN Extensions are loaded from an .xml description and OpenCL kernel files - plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}); - slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; - } - - /** Setting plugin parameter for collecting per layer metrics **/ - if (FLAGS_pc) { - plugin.SetConfig({ { PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::YES } }); - } - - /** Printing plugin version **/ - printPluginVersion(plugin, std::cout); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ - std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin"; - slog::info << "Loading network files:" - "\n\t" << FLAGS_m << - "\n\t" << binFileName << - slog::endl; - - CNNNetReader networkReader; - /** Reading network model **/ - networkReader.ReadNetwork(FLAGS_m); - - /** Extracting model name and loading weights **/ - networkReader.ReadWeights(binFileName); - CNNNetwork network = networkReader.getNetwork(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 3. Configure input & output --------------------------------------------- - - // --------------------------- Prepare input blobs ----------------------------------------------------- - slog::info << "Preparing input blobs" << slog::endl; - - /** Taking information about all topology inputs **/ - InputsDataMap inputInfo = network.getInputsInfo(); - if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies only with 1 input"); - - auto inputInfoItem = *inputInfo.begin(); - - /** Specifying the precision and layout of input data provided by the user. - * This should be called before load of the network to the plugin **/ - inputInfoItem.second->setPrecision(Precision::U8); - inputInfoItem.second->setLayout(Layout::NCHW); - - std::vector> imagesData; - for (auto & i : imageNames) { - FormatReader::ReaderPtr reader(i.c_str()); - if (reader.get() == nullptr) { - slog::warn << "Image " + i + " cannot be read!" << slog::endl; - continue; - } - /** Store image data **/ - std::shared_ptr data( - reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3], - inputInfoItem.second->getTensorDesc().getDims()[2])); - if (data.get() != nullptr) { - imagesData.push_back(data); - } - } - if (imagesData.empty()) throw std::logic_error("Valid input images were not found!"); - - /** Setting batch size using image count **/ - network.setBatchSize(imagesData.size()); - size_t batchSize = network.getBatchSize(); - slog::info << "Batch size is " << std::to_string(batchSize) << slog::endl; - - // ------------------------------ Prepare output blobs ------------------------------------------------- - slog::info << "Preparing output blobs" << slog::endl; - - OutputsDataMap outputInfo(network.getOutputsInfo()); - // BlobMap outputBlobs; - std::string firstOutputName; - - for (auto & item : outputInfo) { - if (firstOutputName.empty()) { - firstOutputName = item.first; - } - DataPtr outputData = item.second; - if (!outputData) { - throw std::logic_error("output data pointer is not valid"); - } - - item.second->setPrecision(Precision::FP32); - } - - const SizeVector outputDims = outputInfo.begin()->second->getDims(); - - bool outputCorrect = false; - if (outputDims.size() == 2 /* NC */) { - outputCorrect = true; - } else if (outputDims.size() == 4 /* NCHW */) { - /* H = W = 1 */ - if (outputDims[2] == 1 && outputDims[3] == 1) outputCorrect = true; - } - - if (!outputCorrect) { - throw std::logic_error("Incorrect output dimensions for classification model"); - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - slog::info << "Loading model to the plugin" << slog::endl; - - ExecutableNetwork executable_network = plugin.LoadNetwork(network, {}); - inputInfoItem.second = {}; - outputInfo = {}; - network = {}; - networkReader = {}; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 5. Create infer request ------------------------------------------------- - InferRequest infer_request = executable_network.CreateInferRequest(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 6. Prepare input -------------------------------------------------------- - /** Iterate over all the input blobs **/ - for (const auto & item : inputInfo) { - /** Creating input blob **/ - Blob::Ptr input = infer_request.GetBlob(item.first); - - /** Filling input tensor with images. First b channel, then g and r channels **/ - size_t num_channels = input->getTensorDesc().getDims()[1]; - size_t image_size = input->getTensorDesc().getDims()[2] * input->getTensorDesc().getDims()[3]; - - auto data = input->buffer().as::value_type*>(); - - /** Iterate over all input images **/ - for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) { - /** Iterate over all pixel in image (b,g,r) **/ - for (size_t pid = 0; pid < image_size; pid++) { - /** Iterate over all channels **/ - for (size_t ch = 0; ch < num_channels; ++ch) { - /** [images stride + channels stride + pixel id ] all in bytes **/ - data[image_id * image_size * num_channels + ch * image_size + pid ] = imagesData.at(image_id).get()[pid*num_channels + ch]; - } - } - } - } - inputInfo = {}; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 7. Do inference --------------------------------------------------------- - slog::info << "Starting inference (" << FLAGS_ni << " iterations)" << slog::endl; - - typedef std::chrono::high_resolution_clock Time; - typedef std::chrono::duration> ms; - typedef std::chrono::duration fsec; - - double total = 0.0; - /** Start inference & calc performance **/ - for (size_t iter = 0; iter < FLAGS_ni; ++iter) { - auto t0 = Time::now(); - infer_request.Infer(); - auto t1 = Time::now(); - fsec fs = t1 - t0; - ms d = std::chrono::duration_cast(fs); - total += d.count(); - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 8. Process output ------------------------------------------------------- - slog::info << "Processing output blobs" << slog::endl; - - const Blob::Ptr output_blob = infer_request.GetBlob(firstOutputName); - - /** Validating -nt value **/ - const size_t resultsCnt = output_blob->size() / batchSize; - if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) { - slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \ - << resultsCnt+1 << " and more than 0)\n will be used maximal value : " << resultsCnt; - FLAGS_nt = resultsCnt; - } - - /** Read labels from file (e.x. AlexNet.labels) **/ - std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels"; - std::vector labels; - - std::ifstream inputFile; - inputFile.open(labelFileName, std::ios::in); - if (inputFile.is_open()) { - std::string strLine; - while (std::getline(inputFile, strLine)) { - trim(strLine); - labels.push_back(strLine); - } - } - - ClassificationResult classificationResult(output_blob, imageNames, - batchSize, FLAGS_nt, - labels); - classificationResult.print(); - - // ----------------------------------------------------------------------------------------------------- - if (std::fabs(total) < std::numeric_limits::epsilon()) { - throw std::logic_error("total can't be equal to zero"); - } - std::cout << std::endl << "total inference time: " << total << std::endl; - std::cout << "Average running time of one iteration: " << total / static_cast(FLAGS_ni) << " ms" << std::endl; - std::cout << std::endl << "Throughput: " << 1000 * static_cast(FLAGS_ni) * batchSize / total << " FPS" << std::endl; - std::cout << std::endl; - - /** Show performance results **/ - if (FLAGS_pc) { - printPerformanceCounts(infer_request, std::cout); - } - } - catch (const std::exception& error) { - slog::err << "" << error.what() << slog::endl; - return 1; - } - catch (...) { - slog::err << "Unknown/internal exception happened." << slog::endl; - return 1; - } - - slog::info << "Execution successful" << slog::endl; - return 0; -} diff --git a/inference-engine/samples/classification_sample_async/CMakeLists.txt b/inference-engine/samples/classification_sample_async/CMakeLists.txt index 9e37440baf5a6a..36f5d1a71dfc0f 100644 --- a/inference-engine/samples/classification_sample_async/CMakeLists.txt +++ b/inference-engine/samples/classification_sample_async/CMakeLists.txt @@ -2,27 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -set (TARGET_NAME "classification_sample_async") - -file (GLOB SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${SRC}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${SRC}) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} IE::ie_cpu_extension format_reader gflags) - -if(UNIX) - target_link_libraries(${TARGET_NAME} ${LIB_DL} pthread) -endif() +ie_add_sample(NAME classification_sample_async + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" + HEADERS classification_sample_async.h + DEPENDENCIES format_reader) diff --git a/inference-engine/samples/classification_sample_async/README.md b/inference-engine/samples/classification_sample_async/README.md index 2afc8fd5ee3ebd..8fd87be306729c 100644 --- a/inference-engine/samples/classification_sample_async/README.md +++ b/inference-engine/samples/classification_sample_async/README.md @@ -1,31 +1,25 @@ # Image Classification C++ Sample Async -This sample demonstrates how to build and execute inference in pipelined mode on example of classifications networks. +This sample demonstrates how to run the Image Classification sample application with inference executed in the asynchronous mode. > **NOTE:** This topic describes usage of C++ implementation of the Image Classification Sample Async. For the Python* implementation, refer to [Image Classification Python* Sample Async](./inference-engine/ie_bridges/python/sample/classification_sample_async/README.md). -The pipelined mode might increase the throughput of the pictures. The latency of one inference will be the same as for synchronous execution. +The sample demonstrates how to use the new Infer Request API of Inference Engine in applications. +Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details. +The sample demonstrates how to build and execute an inference request 10 times in the asynchronous mode on example of classifications networks. +The asynchronous mode might increase the throughput of the pictures. -The throughput increases due to follow reasons: -* Some plugins have heterogeneity inside themselves. Data transferring, execution on remote device, pre-processing and post-processing on the host -* Using of explicit heterogeneous plugin with execution of different parts of network on different devices - -When two or more devices process one image, creating several infer requests and starting asynchronous inference allow for using devices in the most efficient way. -If two devices are involved in execution, the most optimal value for `-nireq` option is 2. - -To process infer requests more efficiently, Classification Sample Async uses round-robin algorithm. It starts execution of the current infer request and switches to waiting for results of the previous one. After finishing of waiting, it switches infer requests and repeat the procedure. - -Another required aspect of good throughput is a number of iterations. Only with big number of iterations you can emulate the real application work and get good performance. - -The batch mode is an independent attribute on the pipelined mode. Pipelined mode works efficiently with any batch size. +The batch mode is an independent attribute on the asynchronous mode. Asynchronous mode works efficiently with any batch size. ## How It Works -Upon the start-up, the sample application reads command line parameters and loads a network and an image to the Inference -Engine plugin. -Then application creates several infer requests pointed in `-nireq` parameter and loads images for inference. +Upon the start-up, the sample application reads command line parameters and loads specified network and input images (or a +folder with images) to the Inference Engine plugin. The batch size of the network is set according to the number of read images. + +Then, the sample creates an inference request object and assigns completion callback for it. In scope of the completion callback +handling the inference request is executed again. -Then in a loop it starts inference for the current infer request and switches to waiting for the previous one. When results are ready, it swaps infer requests. +After that, the application starts inference for the first infer request and waits of 10th inference request execution being completed. When inference is done, the application outputs data to the standard output stream. @@ -43,36 +37,16 @@ InferenceEngine: classification_sample_async [OPTION] Options: - -h - Print a usage message. - -i "" "" - Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet - and a .bmp file for the other networks. - -m "" - Required. Path to an .xml file with a trained model. - -l "" - Required for CPU. Absolute path to a shared library with the kernel implementations - Or - -c "" - Required for GPU custom kernels. Absolute path to the .xml file with kernel descriptions - -pp "" - Optional. Path to a plugin folder. - -d "" - Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified. Default value is "CPU". - -nt "" - Optional. Number of top results. Default value is 10. - -ni "" - Optional. Number of iterations. Default value is 1. - -pc - Optional. Enables per-layer performance report - -nireq "" - Optional. Number of infer request for pipelined mode. Default value is 1. - -p_msg - Optional. Enables messages from a plugin - -nthreads "" - Optional. Number of threads to use for inference on the CPU (including HETERO cases) - -pin "YES"/"NO" - Optional. Enable ("YES", default) or disable ("NO") CPU threads pinning for CPU-involved inference + -h Print a usage message. + -i "" Required. Path to a folder with images or path to an image files: a .ubyte file for LeNetand a .bmp file for the other networks. + -m "" Required. Path to an .xml file with a trained model. + -l "" Required for CPU custom layers.Absolute path to a shared library with the kernels implementation + Or + -c "" Required for GPU custom kernels.Absolute path to the .xml file with kernels description + -d "" Optional. Specify the target device to infer on (the list of available devices is shown below). Default value is CPU. Sample will look for a suitable plugin for device specified. + -nt "" Optional. Number of top results. Default value is 10. + -p_msg Optional. Enables messages from a plugin + ``` Running the application with the empty list of options yields the usage message given above and an error message. @@ -81,15 +55,14 @@ To run the sample, use AlexNet and GoogLeNet or other public or pre-trained imag > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). -You can do inference on an image using a trained AlexNet network on FPGA with fallback to CPU using the following command: +You can do inference of an image using a trained AlexNet network on FPGA with fallback to CPU using the following command: ```sh -./classification_sample_async -i /cat.bmp -m /alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU -nireq 2 -ni 200 +./classification_sample_async -i /cat.bmp -m /alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU ``` ## Sample Output By default the application outputs top-10 inference results for each infer request. -In addition to this information it will provide throughput value measured in frames per seconds. ## See Also * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/samples/classification_sample_async/classification_sample_async.h b/inference-engine/samples/classification_sample_async/classification_sample_async.h index 2a44ac39b4d57f..cb561db342fd8c 100644 --- a/inference-engine/samples/classification_sample_async/classification_sample_async.h +++ b/inference-engine/samples/classification_sample_async/classification_sample_async.h @@ -9,12 +9,6 @@ #include #include -#ifdef _WIN32 -#include -#else -#include -#endif - /// @brief message for help argument static const char help_message[] = "Print a usage message."; @@ -22,32 +16,16 @@ static const char help_message[] = "Print a usage message."; static const char image_message[] = "Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet"\ "and a .bmp file for the other networks."; -/// @brief message for plugin_path argument -static const char plugin_path_message[] = "Optional. Path to a plugin folder."; - /// @brief message for model argument static const char model_message[] = "Required. Path to an .xml file with a trained model."; /// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \ - "Sample will look for a suitable plugin for device specified. Default value is CPU"; - -/// @brief message for performance counters -static const char performance_counter_message[] = "Optional. Enables per-layer performance report"; +static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " \ + "Default value is CPU. Sample will look for a suitable plugin for device specified."; /// @brief message for top results number static const char ntop_message[] = "Optional. Number of top results. Default value is 10."; -/// @brief message for iterations count -static const char iterations_count_message[] = "Optional. Number of iterations. Default value is 1."; - -/// @brief message for iterations count -static const char ninfer_request_message[] = "Optional. Number of infer request for pipelined mode. Default value is 1."; - -/// @brief message for #threads for CPU inference -static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU " - "(including HETERO cases)."; - /// @brief message for clDNN custom kernels desc static const char custom_cldnn_message[] = "Required for GPU custom kernels."\ "Absolute path to the .xml file with kernels description"; @@ -56,10 +34,6 @@ static const char custom_cldnn_message[] = "Required for GPU custom kernels."\ static const char custom_cpu_library_message[] = "Required for CPU custom layers." \ "Absolute path to a shared library with the kernels implementation"; -// @brief message for CPU threads pinning option -static const char cpu_threads_pinning_message[] = "Optional. Enable (\"YES\", default) or disable (\"NO\")" \ - "CPU threads pinning for CPU-involved inference."; - /// @brief message for plugin messages static const char plugin_message[] = "Optional. Enables messages from a plugin"; @@ -75,18 +49,12 @@ DEFINE_string(i, "", image_message); /// It is a required parameter DEFINE_string(m, "", model_message); -/// @brief Define parameter for set path to plugins
-DEFINE_string(pp, "", plugin_path_message); - /// @brief device the target device to infer on
DEFINE_string(d, "CPU", target_device_message); /// @brief Top results number (default 10)
DEFINE_uint32(nt, 10, ntop_message); -/// @brief Enable per-layer performance report -DEFINE_bool(pc, false, performance_counter_message); - /// @brief Define parameter for clDNN custom kernels path
/// Default is ./lib DEFINE_string(c, "", custom_cldnn_message); @@ -95,22 +63,9 @@ DEFINE_string(c, "", custom_cldnn_message); /// It is a optional parameter DEFINE_string(l, "", custom_cpu_library_message); -/// @brief Iterations count (default 1) -DEFINE_uint32(ni, 1, iterations_count_message); - -/// @brief Number of infer requests -DEFINE_uint32(nireq, 1, ninfer_request_message); - /// @brief Enable plugin messages DEFINE_bool(p_msg, false, plugin_message); -/// @brief Enable plugin messages -DEFINE_string(pin, "YES", cpu_threads_pinning_message); - -/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases) -DEFINE_int32(nthreads, 0, infer_num_threads_message); - - /** * @brief This function show a help message */ @@ -122,17 +77,10 @@ static void showUsage() { std::cout << " -h " << help_message << std::endl; std::cout << " -i \"\" " << image_message << std::endl; std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; + std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; std::cout << " Or" << std::endl; - std::cout << " -c \"\" " << custom_cldnn_message << std::endl; - std::cout << " -pp \"\" " << plugin_path_message << std::endl; + std::cout << " -c \"\" " << custom_cldnn_message << std::endl; std::cout << " -d \"\" " << target_device_message << std::endl; std::cout << " -nt \"\" " << ntop_message << std::endl; - std::cout << " -ni \"\" " << iterations_count_message << std::endl; - std::cout << " -pc " << performance_counter_message << std::endl; - std::cout << " -nireq \"\" " << ninfer_request_message << std::endl; std::cout << " -p_msg " << plugin_message << std::endl; - std::cout << " Some CPU-specific performance options" << std::endl; - std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; - std::cout << " -pin \"YES\"/\"NO\" " << cpu_threads_pinning_message << std::endl; } diff --git a/inference-engine/samples/classification_sample_async/main.cpp b/inference-engine/samples/classification_sample_async/main.cpp index f73f12628512b1..d4e99eda4bf05f 100644 --- a/inference-engine/samples/classification_sample_async/main.cpp +++ b/inference-engine/samples/classification_sample_async/main.cpp @@ -4,17 +4,17 @@ /** * @brief The entry point the Inference Engine sample application -* @file classification_sample/main.cpp -* @example classification_sample/main.cpp +* @file classification_sample_async/main.cpp +* @example classification_sample_async/main.cpp */ #include -#include #include -#include #include #include #include +#include +#include #include @@ -41,18 +41,11 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { showUsage(); + showAvailableDevices(); return false; } slog::info << "Parsing input parameters" << slog::endl; - if (FLAGS_ni < 1) { - throw std::logic_error("Parameter -ni must be more than 0 ! (default 1)"); - } - - if (FLAGS_nireq < 1) { - throw std::logic_error("Parameter -nireq must be more than 0 ! (default 1)"); - } - if (FLAGS_i.empty()) { throw std::logic_error("Parameter -i is not set"); } @@ -61,10 +54,6 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { throw std::logic_error("Parameter -m is not set"); } - if (FLAGS_ni < FLAGS_nireq) { - throw std::logic_error("Number of iterations could not be less than requests quantity"); - } - return true; } @@ -83,11 +72,13 @@ int main(int argc, char *argv[]) { if (imageNames.empty()) throw std::logic_error("No suitable images were found"); // ----------------------------------------------------------------------------------------------------- - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - slog::info << "Loading plugin" << slog::endl; - InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d); + // --------------------------- 1. Load inference engine ------------------------------------- + slog::info << "Creating Inference Engine" << slog::endl; + + Core ie; + if (FLAGS_p_msg) { - static_cast(plugin)->SetLogCallback(error_listener); + ie.SetLogCallback(error_listener); } /** Loading default extensions **/ @@ -97,24 +88,23 @@ int main(int argc, char *argv[]) { * custom MKLDNNPlugin layer implementations. These layers are not supported * by mkldnn, but they can be useful for inferring custom topologies. **/ - plugin.AddExtension(std::make_shared()); + ie.AddExtension(std::make_shared(), "CPU"); } if (!FLAGS_l.empty()) { // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension IExtensionPtr extension_ptr = make_so_pointer(FLAGS_l); - plugin.AddExtension(extension_ptr); + ie.AddExtension(extension_ptr, "CPU"); slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; } if (!FLAGS_c.empty()) { // clDNN Extensions are loaded from an .xml description and OpenCL kernel files - plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}); + ie.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}, "GPU"); slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; } - ResponseDesc resp; - /** Printing plugin version **/ - printPluginVersion(plugin, std::cout); + /** Printing device version **/ + std::cout << ie.GetVersions(FLAGS_d) << std::endl; // ----------------------------------------------------------------------------------------------------- // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ @@ -138,17 +128,18 @@ int main(int argc, char *argv[]) { /** Taking information about all topology inputs **/ InputsDataMap inputInfo(network.getInputsInfo()); - if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies only with 1 input"); + if (inputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 input only"); auto inputInfoItem = *inputInfo.begin(); /** Specifying the precision and layout of input data provided by the user. - * This should be called before load of the network to the plugin **/ + * This should be called before load of the network to the device **/ inputInfoItem.second->setPrecision(Precision::U8); inputInfoItem.second->setLayout(Layout::NCHW); - std::vector> imagesData; - for (auto & i : imageNames) { + std::vector> imagesData = {}; + std::vector validImageNames = {}; + for (const auto & i : imageNames) { FormatReader::ReaderPtr reader(i.c_str()); if (reader.get() == nullptr) { slog::warn << "Image " + i + " cannot be read!" << slog::endl; @@ -158,8 +149,9 @@ int main(int argc, char *argv[]) { std::shared_ptr data( reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3], inputInfoItem.second->getTensorDesc().getDims()[2])); - if (data.get() != nullptr) { + if (data != nullptr) { imagesData.push_back(data); + validImageNames.push_back(i); } } if (imagesData.empty()) throw std::logic_error("Valid input images were not found!"); @@ -169,56 +161,27 @@ int main(int argc, char *argv[]) { size_t batchSize = network.getBatchSize(); slog::info << "Batch size is " << std::to_string(batchSize) << slog::endl; - // ------------------------------ Prepare output blobs ------------------------------------------------- - slog::info << "Preparing output blobs" << slog::endl; - - OutputsDataMap outputInfo(network.getOutputsInfo()); - std::vector outputBlobs; - for (size_t i = 0; i < FLAGS_nireq; i++) { - auto outputBlob = make_shared_blob::value_type>(outputInfo.begin()->second->getTensorDesc()); - outputBlob->allocate(); - outputBlobs.push_back(outputBlob); - } // ----------------------------------------------------------------------------------------------------- - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - slog::info << "Loading model to the plugin" << slog::endl; - - std::map config; - if (FLAGS_pc) - config[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES; - if (FLAGS_d.find("CPU") != std::string::npos) { // CPU supports few special performance-oriented keys - // limit threading for CPU portion of inference - config[PluginConfigParams::KEY_CPU_THREADS_NUM] = std::to_string(FLAGS_nthreads); - // pin threads for CPU portion of inference - config[PluginConfigParams::KEY_CPU_BIND_THREAD] = FLAGS_pin; - // for pure CPU execution, more throughput-oriented execution via streams - if (FLAGS_d == "CPU") - config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = std::to_string(FLAGS_nireq); - } - ExecutableNetwork executable_network = plugin.LoadNetwork(network, config); + // --------------------------- 4. Loading model to the device ------------------------------------------ + slog::info << "Loading model to the device" << slog::endl; + ExecutableNetwork executable_network = ie.LoadNetwork(network, FLAGS_d); // ----------------------------------------------------------------------------------------------------- // --------------------------- 5. Create infer request ------------------------------------------------- - std::vector inferRequests; - for (size_t i = 0; i < FLAGS_nireq; i++) { - InferRequest inferRequest = executable_network.CreateInferRequest(); - inferRequests.push_back(inferRequest); - } + slog::info << "Create infer request" << slog::endl; + InferRequest inferRequest = executable_network.CreateInferRequest(); // ----------------------------------------------------------------------------------------------------- // --------------------------- 6. Prepare input -------------------------------------------------------- - BlobMap inputBlobs; for (auto & item : inputInfo) { - auto input = make_shared_blob::value_type>(item.second->getTensorDesc()); - input->allocate(); - inputBlobs[item.first] = input; - - auto dims = input->getTensorDesc().getDims(); + Blob::Ptr inputBlob = inferRequest.GetBlob(item.first); + SizeVector dims = inputBlob->getTensorDesc().getDims(); /** Fill input tensor with images. First b channel, then g and r channels **/ size_t num_channels = dims[1]; size_t image_size = dims[3] * dims[2]; + auto data = inputBlob->buffer().as::value_type *>(); /** Iterate over all input images **/ for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) { /** Iterate over all pixel in image (b,g,r) **/ @@ -226,99 +189,77 @@ int main(int argc, char *argv[]) { /** Iterate over all channels **/ for (size_t ch = 0; ch < num_channels; ++ch) { /** [images stride + channels stride + pixel id ] all in bytes **/ - input->data()[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid*num_channels + ch]; + data[image_id * image_size * num_channels + ch * image_size + pid] = imagesData.at(image_id).get()[pid*num_channels + ch]; } } } } - for (size_t i = 0; i < FLAGS_nireq; i++) { - inferRequests[i].SetBlob(inputBlobs.begin()->first, inputBlobs.begin()->second); - inferRequests[i].SetBlob(outputInfo.begin()->first, outputBlobs[i]); - } // ----------------------------------------------------------------------------------------------------- // --------------------------- 7. Do inference --------------------------------------------------------- - slog::info << "Start inference (" << FLAGS_ni << " iterations)" << slog::endl; - - typedef std::chrono::high_resolution_clock Time; - typedef std::chrono::duration> ms; - typedef std::chrono::duration fsec; - - // warming up - inferRequests[0].StartAsync(); - inferRequests[0].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY); - double total = 0.0; - /** Start inference & calc performance **/ - auto t0 = Time::now(); + size_t numIterations = 10; + size_t curIteration = 0; + std::condition_variable condVar; + + inferRequest.SetCompletionCallback( + [&] { + curIteration++; + slog::info << "Completed " << curIteration << " async request execution" << slog::endl; + if (curIteration < numIterations) { + /* here a user can read output containing inference results and put new input + to repeat async request again */ + inferRequest.StartAsync(); + } else { + /* continue sample execution after last Asynchronous inference request execution */ + condVar.notify_one(); + } + }); - size_t currentInfer = 0; - size_t prevInfer = (FLAGS_nireq > 1) ? 1 : 0; + /* Start async request for the first time */ + slog::info << "Start inference (" << numIterations << " asynchronous executions)" << slog::endl; + inferRequest.StartAsync(); - for (size_t iter = 0; iter < FLAGS_ni + FLAGS_nireq; ++iter) { - if (iter < FLAGS_ni) { - inferRequests[currentInfer].StartAsync(); - } - inferRequests[prevInfer].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY); + /* Wait all repetitions of the async request */ + std::mutex mutex; + std::unique_lock lock(mutex); + condVar.wait(lock, [&]{ return curIteration == numIterations; }); - currentInfer++; - if (currentInfer >= FLAGS_nireq) { - currentInfer = 0; - } - prevInfer++; - if (prevInfer >= FLAGS_nireq) { - prevInfer = 0; - } - } - auto t1 = Time::now(); - fsec fs = t1 - t0; - ms d = std::chrono::duration_cast(fs); - total = d.count(); // ----------------------------------------------------------------------------------------------------- // --------------------------- 8. Process output ------------------------------------------------------- slog::info << "Processing output blobs" << slog::endl; + OutputsDataMap outputInfo(network.getOutputsInfo()); + if (outputInfo.size() != 1) throw std::logic_error("Sample supports topologies with 1 output only"); + Blob::Ptr outputBlob = inferRequest.GetBlob(outputInfo.begin()->first); + + /** Validating -nt value **/ + const size_t resultsCnt = outputBlob->size() / batchSize; + if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) { + slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \ + << resultsCnt+1 << " and more than 0)\n will be used maximal value : " << resultsCnt << slog::endl; + FLAGS_nt = resultsCnt; + } - for (size_t i = 0; i < FLAGS_nireq; i++) { - /** Validating -nt value **/ - const size_t resultsCnt = outputBlobs[i]->size() / batchSize; - if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) { - slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \ - << resultsCnt+1 << " and more than 0)\n will be used maximal value : " << resultsCnt << slog::endl; - FLAGS_nt = resultsCnt; - } - - /** Read labels from file (e.x. AlexNet.labels) **/ - std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels"; - std::vector labels; - - std::ifstream inputFile; - inputFile.open(labelFileName, std::ios::in); - if (inputFile.is_open()) { - std::string strLine; - while (std::getline(inputFile, strLine)) { - trim(strLine); - labels.push_back(strLine); - } + /** Read labels from file (e.x. AlexNet.labels) **/ + std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels"; + std::vector labels; + + std::ifstream inputFile; + inputFile.open(labelFileName, std::ios::in); + if (inputFile.is_open()) { + std::string strLine; + while (std::getline(inputFile, strLine)) { + trim(strLine); + labels.push_back(strLine); } - - ClassificationResult classificationResult(outputBlobs[i], imageNames, - batchSize, FLAGS_nt, - labels); - classificationResult.print(); } + + ClassificationResult classificationResult(outputBlob, validImageNames, + batchSize, FLAGS_nt, + labels); + classificationResult.print(); // ----------------------------------------------------------------------------------------------------- - std::cout << std::endl << "total inference time: " << total << std::endl; - std::cout << std::endl << "Throughput: " << 1000 * static_cast(FLAGS_ni) * batchSize / total << " FPS" << std::endl; - std::cout << std::endl; - - /** Show performance results **/ - std::map performanceMap; - if (FLAGS_pc) { - for (size_t nireq = 0; nireq < FLAGS_nireq; nireq++) { - printPerformanceCounts(inferRequests[nireq], std::cout); - } - } } catch (const std::exception& error) { slog::err << error.what() << slog::endl; @@ -330,5 +271,7 @@ int main(int argc, char *argv[]) { } slog::info << "Execution successful" << slog::endl; + slog::info << slog::endl << "This sample is an API example, for any performance measurements " + "please use the dedicated benchmark_app tool" << slog::endl; return 0; } diff --git a/inference-engine/samples/common/format_reader/CMakeLists.txt b/inference-engine/samples/common/format_reader/CMakeLists.txt index e3ecd5850b0dc9..c4011c48a7034e 100644 --- a/inference-engine/samples/common/format_reader/CMakeLists.txt +++ b/inference-engine/samples/common/format_reader/CMakeLists.txt @@ -13,20 +13,11 @@ file (GLOB LIBRARY_HEADERS ) # Find OpenCV components if exist -find_package(OpenCV COMPONENTS imgcodecs videoio imgproc QUIET) +find_package(OpenCV COMPONENTS imgcodecs videoio imgproc QUIET) if(NOT(OpenCV_FOUND)) message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " is built without OPENCV support") -else() - add_definitions(-DUSE_OPENCV) endif() -add_definitions(-DIMPLEMENT_FORMAT_READER) - -if(UNIX) - list(REMOVE_ITEM MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/dllmain.cpp) -endif() -add_definitions(-DIMPLEMENT_INFERENCE_ENGINE_API) - # Create named folders for the sources within the .vcproj # Empty name lists them directly under the .vcproj source_group("src" FILES ${LIBRARY_SRC}) @@ -35,12 +26,20 @@ source_group("include" FILES ${LIBRARY_HEADERS}) # Create library file from sources. add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS}) -target_link_libraries(${TARGET_NAME} ${OpenCV_LIBRARIES}) -if(CMAKE_VERSION VERSION_LESS "2.8.11") - include_directories (${CMAKE_CURRENT_SOURCE_DIR}) -else() - target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +if(OpenCV_FOUND) + target_link_libraries(${TARGET_NAME} PRIVATE ${OpenCV_LIBRARIES}) + target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV) endif() +target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_FORMAT_READER) + +target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/..") + set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}) + +# developer package + +export(TARGETS ${TARGET_NAME} NAMESPACE IE:: + APPEND FILE "${CMAKE_BINARY_DIR}/targets_developer.cmake") diff --git a/inference-engine/samples/common/samples/args_helper.hpp b/inference-engine/samples/common/samples/args_helper.hpp index a38570b9033da6..9f41f216126af8 100644 --- a/inference-engine/samples/common/samples/args_helper.hpp +++ b/inference-engine/samples/common/samples/args_helper.hpp @@ -15,6 +15,8 @@ #include #include +#include + #ifdef _WIN32 #include #else diff --git a/inference-engine/samples/common/samples/common.hpp b/inference-engine/samples/common/samples/common.hpp index 44bcca3f3e087c..fbcd249f1c2e34 100644 --- a/inference-engine/samples/common/samples/common.hpp +++ b/inference-engine/samples/common/samples/common.hpp @@ -14,23 +14,16 @@ #include #include #include -#include -#include #include -#include -#include #include #include #include - #include -#include +#include -#include -#include -#include +#include +#include #include -#include #include #ifndef UNUSED @@ -51,7 +44,7 @@ class ConsoleErrorListener : public InferenceEngine::IErrorListener { * @param msg Error message */ void onError(const char *msg) noexcept override { - std::clog << "Plugin message: " << msg << std::endl; + std::clog << "Device message: " << msg << std::endl; } }; @@ -66,48 +59,6 @@ inline std::string &trim(std::string &s) { return s; } -/** -* @brief Converts string to TargetDevice -* @param deviceName - string value representing device -* @return TargetDevice value that corresponds to input string. -* eDefault in case no corresponding value was found -*/ -static InferenceEngine::TargetDevice getDeviceFromStr(const std::string &deviceName) { - return InferenceEngine::TargetDeviceInfo::fromStr(deviceName); -} - -/** -* @brief Loads plugin from directories -* @param pluginDirs - plugin paths -* @param plugin - plugin name -* @param device - device to infer on -* @return Plugin pointer -*/ -static InferenceEngine::InferenceEnginePluginPtr selectPlugin(const std::vector &pluginDirs, - const file_name_t &plugin, - InferenceEngine::TargetDevice device) { - InferenceEngine::PluginDispatcher dispatcher(pluginDirs); - - if (!plugin.empty()) { - return dispatcher.getPluginByName(plugin); - } else { - return dispatcher.getSuitablePlugin(device); - } -} - -/** - * @brief Loads plugin from directories - * @param pluginDirs - plugin paths - * @param plugin - plugin name - * @param device - string representation of device to infer on - * @return Plugin pointer - */ -static UNUSED InferenceEngine::InferenceEnginePluginPtr selectPlugin(const std::vector &pluginDirs, - const file_name_t &plugin, - const std::string &device) { - return selectPlugin(pluginDirs, plugin, getDeviceFromStr(device)); -} - /** * @brief Gets filename without extension * @param filepath - full file name @@ -146,64 +97,47 @@ static UNUSED std::ostream &operator<<(std::ostream &os, const InferenceEngine:: return os; } -/** - * @class PluginVersion - * @brief A PluginVersion class stores plugin version and initialization status - */ -struct PluginVersion : public InferenceEngine::Version { - bool initialized = false; +inline std::ostream &operator<<(std::ostream &os, const InferenceEngine::Version &version) { + os << "\t" << version.description << " version ......... "; + os << version.apiVersion.major << "." << version.apiVersion.minor; - explicit PluginVersion(const InferenceEngine::Version *ver) { - if (nullptr == ver) { - return; - } - InferenceEngine::Version::operator=(*ver); - initialized = true; - } - - operator bool() const noexcept { - return initialized; - } -}; + os << "\n\tBuild ........... "; + os << version.buildNumber; -static UNUSED std::ostream &operator<<(std::ostream &os, const PluginVersion &version) { - os << "\tPlugin version ......... "; - if (!version) { - os << "UNKNOWN"; - } else { - os << version.apiVersion.major << "." << version.apiVersion.minor; - } - - os << "\n\tPlugin name ............ "; - if (!version || version.description == nullptr) { - os << "UNKNOWN"; - } else { - os << version.description; - } + return os; +} - os << "\n\tPlugin build ........... "; - if (!version || version.buildNumber == nullptr) { - os << "UNKNOWN"; - } else { - os << version.buildNumber; +inline std::ostream &operator<<(std::ostream &os, const std::map &versions) { + for (auto && version : versions) { + os << "\t" << version.first << std::endl; + os << version.second << std::endl; } return os; } -inline void printPluginVersion(InferenceEngine::InferenceEnginePluginPtr ptr, std::ostream& stream) { - const InferenceEngine::Version *pluginVersion = nullptr; - ptr->GetVersion(pluginVersion); - stream << pluginVersion << std::endl; -} - static UNUSED std::vector> blobToImageOutputArray(InferenceEngine::TBlob::Ptr output, size_t *pWidth, size_t *pHeight, size_t *pChannels) { std::vector> outArray; - size_t W = output->dims().at(0); - size_t H = output->dims().at(1); - size_t C = output->dims().at(2); + size_t W = 0, C = 0, H = 0; + + auto outputDims = output->getTensorDesc().getDims(); + if (outputDims.size() == 3) { + C = outputDims.at(0); + H = outputDims.at(1); + W = outputDims.at(2); + } else if (outputDims.size() == 4) { + C = outputDims.at(1); + H = outputDims.at(2); + W = outputDims.at(3); + } else if (outputDims.size() == 5) { + C = outputDims.at(1); + H = outputDims.at(3); + W = outputDims.at(4); + } else { + THROW_IE_EXCEPTION << "Output blob has unsupported layout " << output->getTensorDesc().getLayout(); + } // Get classes const float *outData = output->data(); @@ -632,14 +566,6 @@ static UNUSED bool writeOutputBmp(unsigned char *data, size_t height, size_t wid return true; } -inline double getDurationOf(std::function func) { - auto t0 = std::chrono::high_resolution_clock::now(); - func(); - auto t1 = std::chrono::high_resolution_clock::now(); - std::chrono::duration fs = t1 - t0; - return std::chrono::duration_cast>>(fs).count(); -} - static std::vector> perfCountersSorted(std::map perfMap) { using perfItem = std::pair; @@ -655,7 +581,7 @@ perfCountersSorted(std::map& performanceMap, - std::ostream &stream, + std::ostream &stream, std::string deviceName, bool bshowHeader = true) { long long totalTime = 0; // Print performance counts @@ -689,27 +615,57 @@ static UNUSED void printPerformanceCounts(const std::map 0) { totalTime += it.second.realTime_uSec; } } stream << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime) << " microseconds" << std::endl; + std::cout << std::endl; + std::cout << "Full device name: " << deviceName << std::endl; + std::cout << std::endl; } -static UNUSED void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream &stream) { +static UNUSED void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream &stream, std::string deviceName, bool bshowHeader = true) { auto performanceMap = request.GetPerformanceCounts(); - printPerformanceCounts(performanceMap, stream); + printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader); } -/** - * @deprecated - */ -static UNUSED void printPerformanceCountsPlugin(InferenceEngine::InferenceEnginePluginPtr plugin, std::ostream &stream) { - std::map performanceMap; - plugin->GetPerformanceCounts(performanceMap, nullptr); - printPerformanceCounts(performanceMap, stream); +inline std::map getMapFullDevicesNames(InferenceEngine::Core& ie, std::vector devices) { + std::map devicesMap; + InferenceEngine::Parameter p; + for (std::string& deviceName : devices) { + if (deviceName != "") { + try { + p = ie.GetMetric(deviceName, METRIC_KEY(FULL_DEVICE_NAME)); + devicesMap.insert(std::pair(deviceName, p.as())); + } + catch (InferenceEngine::details::InferenceEngineException &) { + } + } + } + return devicesMap; +} + +inline std::string getFullDeviceName(std::map& devicesMap, std::string device) { + std::map::iterator it = devicesMap.find(device); + if (it != devicesMap.end()) { + return it->second; + } else { + return ""; + } +} + +inline std::string getFullDeviceName(InferenceEngine::Core& ie, std::string device) { + InferenceEngine::Parameter p; + try { + p = ie.GetMetric(device, METRIC_KEY(FULL_DEVICE_NAME)); + return p.as(); + } + catch (InferenceEngine::details::InferenceEngineException &) { + return ""; + } } /** @@ -1058,3 +1014,111 @@ static UNUSED void addRectangles(unsigned char *data, size_t height, size_t widt } } } + +inline std::size_t getTensorWidth(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + const auto& dims = desc.getDims(); + const auto& size = dims.size(); + if ((size >= 2) && + (layout == InferenceEngine::Layout::NCHW || + layout == InferenceEngine::Layout::NHWC || + layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || + layout == InferenceEngine::Layout::OIHW || + layout == InferenceEngine::Layout::CHW || + layout == InferenceEngine::Layout::HW)) { + // Regardless of layout, dimensions are stored in fixed order + return dims.back(); + } else { + THROW_IE_EXCEPTION << "Tensor does not have width dimension"; + } + return 0; +} + +inline std::size_t getTensorHeight(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + const auto& dims = desc.getDims(); + const auto& size = dims.size(); + if ((size >= 2) && + (layout == InferenceEngine::Layout::NCHW || + layout == InferenceEngine::Layout::NHWC || + layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || + layout == InferenceEngine::Layout::OIHW || + layout == InferenceEngine::Layout::CHW || + layout == InferenceEngine::Layout::HW)) { + // Regardless of layout, dimensions are stored in fixed order + return dims.at(size - 2); + } else { + THROW_IE_EXCEPTION << "Tensor does not have height dimension"; + } + return 0; +} + +inline std::size_t getTensorChannels(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + if (layout == InferenceEngine::Layout::NCHW || + layout == InferenceEngine::Layout::NHWC || + layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || + layout == InferenceEngine::Layout::C || + layout == InferenceEngine::Layout::CHW || + layout == InferenceEngine::Layout::NC || + layout == InferenceEngine::Layout::CN) { + // Regardless of layout, dimensions are stored in fixed order + const auto& dims = desc.getDims(); + switch (desc.getLayoutByDims(dims)) { + case InferenceEngine::Layout::C: return dims.at(0); + case InferenceEngine::Layout::NC: return dims.at(1); + case InferenceEngine::Layout::CHW: return dims.at(0); + case InferenceEngine::Layout::NCHW: return dims.at(1); + case InferenceEngine::Layout::NCDHW: return dims.at(1); + case InferenceEngine::Layout::SCALAR: // [[fallthrough]] + case InferenceEngine::Layout::BLOCKED: // [[fallthrough]] + default: + THROW_IE_EXCEPTION << "Tensor does not have channels dimension"; + } + } else { + THROW_IE_EXCEPTION << "Tensor does not have channels dimension"; + } + return 0; +} + +inline std::size_t getTensorBatch(const InferenceEngine::TensorDesc& desc) { + const auto& layout = desc.getLayout(); + if (layout == InferenceEngine::Layout::NCHW || + layout == InferenceEngine::Layout::NHWC || + layout == InferenceEngine::Layout::NCDHW || + layout == InferenceEngine::Layout::NDHWC || + layout == InferenceEngine::Layout::NC || + layout == InferenceEngine::Layout::CN) { + // Regardless of layout, dimensions are stored in fixed order + const auto& dims = desc.getDims(); + switch (desc.getLayoutByDims(dims)) { + case InferenceEngine::Layout::NC: return dims.at(0); + case InferenceEngine::Layout::NCHW: return dims.at(0); + case InferenceEngine::Layout::NCDHW: return dims.at(0); + case InferenceEngine::Layout::CHW: // [[fallthrough]] + case InferenceEngine::Layout::C: // [[fallthrough]] + case InferenceEngine::Layout::SCALAR: // [[fallthrough]] + case InferenceEngine::Layout::BLOCKED: // [[fallthrough]] + default: + THROW_IE_EXCEPTION << "Tensor does not have channels dimension"; + } + } else { + THROW_IE_EXCEPTION << "Tensor does not have channels dimension"; + } + return 0; +} + +inline void showAvailableDevices() { + InferenceEngine::Core ie; + std::vector devices = ie.GetAvailableDevices(); + + std::cout << std::endl; + std::cout << "Available target devices:"; + for (const auto& device : devices) { + std::cout << " " << device; + } + std::cout << " HDDL" << std::endl; +} diff --git a/inference-engine/samples/common/samples/console_progress.hpp b/inference-engine/samples/common/samples/console_progress.hpp index 89b0d74f42e6ee..5edfea80fbc8c7 100644 --- a/inference-engine/samples/common/samples/console_progress.hpp +++ b/inference-engine/samples/common/samples/console_progress.hpp @@ -31,6 +31,7 @@ class ConsoleProgress { if (total == 0) { total = 1; } + std::cout << std::unitbuf; } /** @@ -38,19 +39,23 @@ class ConsoleProgress { * @return */ void showProgress() const { - std::cout << "\rProgress: ["; + std::stringstream strm; + if (!stream_output) { + strm << '\r'; + } + strm << "Progress: ["; size_t i = 0; for (; i < detalization * current / total; i++) { - std::cout << "."; + strm << "."; } for (; i < detalization; i++) { - std::cout << " "; + strm << " "; } - std::cout << "] " << std::fixed << std::setprecision(2) << 100 * static_cast(current) / total << "% done"; + strm << "] " << std::fixed << std::setprecision(2) << 100 * static_cast(current) / total << "% done"; if (stream_output) { - std::cout << std::endl; + std::cout << strm.str() << std::endl; } else { - std::flush(std::cout); + std::cout << strm.str() << std::flush; } } @@ -80,6 +85,6 @@ class ConsoleProgress { * @return */ void finish() { - std::cout << "\n"; + std::cerr << std::nounitbuf << "\n"; } }; diff --git a/inference-engine/samples/common/samples/slog.hpp b/inference-engine/samples/common/samples/slog.hpp index c50b4c94a04c30..186c2cf0a6d410 100644 --- a/inference-engine/samples/common/samples/slog.hpp +++ b/inference-engine/samples/common/samples/slog.hpp @@ -22,6 +22,15 @@ class LogStreamEndLine { }; static constexpr LogStreamEndLine endl; +/** + * @class LogStreamBoolAlpha + * @brief The LogStreamBoolAlpha class implements bool printing for a log stream + */ +class LogStreamBoolAlpha { }; + +static constexpr LogStreamBoolAlpha boolalpha; + + /** * @class LogStream * @brief The LogStream class implements a stream for sample logging @@ -63,6 +72,12 @@ class LogStream { (*_log_stream) << std::endl; return *this; } + + // Specializing for LogStreamBoolAlpha to support slog::boolalpha + LogStream& operator<< (const LogStreamBoolAlpha &/*arg*/) { + (*_log_stream) << std::boolalpha; + return *this; + } }; diff --git a/inference-engine/samples/common/vpu/vpu_tools_common.hpp b/inference-engine/samples/common/vpu/vpu_tools_common.hpp new file mode 100644 index 00000000000000..e2185a8b955976 --- /dev/null +++ b/inference-engine/samples/common/vpu/vpu_tools_common.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +static std::map parseConfig(const std::string &configName, char comment = '#') { + std::map config = {}; + + std::ifstream file(configName); + if (!file.is_open()) { + return config; + } + + std::string key, value; + while (file >> key >> value) { + if (key.empty() || key[0] == comment) { + continue; + } + config[key] = value; + } + + return config; +} diff --git a/inference-engine/samples/hello_autoresize_classification/CMakeLists.txt b/inference-engine/samples/hello_autoresize_classification/CMakeLists.txt deleted file mode 100644 index 01deda64929e51..00000000000000 --- a/inference-engine/samples/hello_autoresize_classification/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set (TARGET_NAME "hello_autoresize_classification") - -file (GLOB SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -# Find OpenCV components if exist -find_package(OpenCV COMPONENTS imgcodecs QUIET) -if(NOT(OpenCV_FOUND)) - message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped") - return() -endif() - -source_group("src" FILES ${SRC}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${SRC}) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" - COMPILE_PDB_NAME ${TARGET_NAME}) - - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} ${OpenCV_LIBRARIES}) - -if(UNIX) - target_link_libraries(${TARGET_NAME} ${LIB_DL}) -endif() diff --git a/inference-engine/samples/hello_autoresize_classification/README.md b/inference-engine/samples/hello_autoresize_classification/README.md deleted file mode 100644 index ae841f6202c072..00000000000000 --- a/inference-engine/samples/hello_autoresize_classification/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Hello Autoresize Classification C++ Sample - -This topic describes how to run the Hello Autoresize Classification sample application. -The sample is simplified version of [Image Classification Sample](./inference-engine/samples/classification_sample/README.md). -It demonstrates how to use the new input autoresize API of Inference Engine in applications. Refer to -[Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details. - -There is also new API introduced to crop a ROI object and set it as input without additional memory re-allocation. -To properly demonstrate this new API, it is required to run several networks in pipeline which is out of scope of this sample. -Please refer to [Object Detection for SSD Demo](./inference-engine/samples/object_detection_demo_ssd_async/README.md), -[Security Barrier Camera Demo](./inference-engine/samples/security_barrier_camera_demo/README.md), or -[Crossroad Camera Demo](./inference-engine/samples/crossroad_camera_demo/README.md) with an example of using of new crop ROI API. - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). - -## Running - -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). - -> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -You can do inference on an image using a trained AlexNet network on CPU using the following command: -```sh -./hello_autoresize_classification /alexnet_fp32.xml /cat.bmp CPU -``` - -## Sample Output - -The application outputs top-10 inference results. - -## See Also -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) -* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) diff --git a/inference-engine/samples/hello_autoresize_classification/main.cpp b/inference-engine/samples/hello_autoresize_classification/main.cpp deleted file mode 100644 index 9700416cdf4a41..00000000000000 --- a/inference-engine/samples/hello_autoresize_classification/main.cpp +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include - -#include -#include -#include - -using namespace InferenceEngine; - -int main(int argc, char *argv[]) { - try { - // ------------------------------ Parsing and validation of input args --------------------------------- - if (argc != 4) { - std::cout << "Usage : ./hello_autoresize_classification " - << std::endl; - return EXIT_FAILURE; - } - - const std::string input_model{argv[1]}; - const std::string input_image_path{argv[2]}; - const std::string device_name{argv[3]}; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - InferencePlugin plugin = PluginDispatcher().getPluginByDevice(device_name); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ - size_t batchSize = 1; - CNNNetReader network_reader; - network_reader.ReadNetwork(input_model); - network_reader.ReadWeights(input_model.substr(0, input_model.size() - 4) + ".bin"); - network_reader.getNetwork().setBatchSize(batchSize); - CNNNetwork network = network_reader.getNetwork(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 3. Configure input & output --------------------------------------------- - // --------------------------- Prepare input blobs ----------------------------------------------------- - InputInfo::Ptr input_info = network.getInputsInfo().begin()->second; - std::string input_name = network.getInputsInfo().begin()->first; - - /* Mark input as resizable by setting of a resize algorithm. - * In this case we will be able to set an input blob of any shape to an infer request. - * Resize and layout conversions are executed automatically during inference */ - input_info->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR); - input_info->setLayout(Layout::NHWC); - input_info->setPrecision(Precision::U8); - - // --------------------------- Prepare output blobs ---------------------------------------------------- - DataPtr output_info = network.getOutputsInfo().begin()->second; - std::string output_name = network.getOutputsInfo().begin()->first; - - output_info->setPrecision(Precision::FP32); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - ExecutableNetwork executable_network = plugin.LoadNetwork(network, {}); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 5. Create infer request ------------------------------------------------- - InferRequest infer_request = executable_network.CreateInferRequest(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 6. Prepare input -------------------------------------------------------- - /* Read input image to a blob and set it to an infer request without resize and layout conversions. */ - cv::Mat image = cv::imread(input_image_path); - Blob::Ptr imgBlob = wrapMat2Blob(image); // just wrap Mat data by Blob::Ptr without allocating of new memory - infer_request.SetBlob(input_name, imgBlob); // infer_request accepts input blob of any size - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 7. Do inference -------------------------------------------------------- - typedef std::chrono::high_resolution_clock Time; - typedef std::chrono::duration> ms; - - double total = 0.0; - - /* Running the request synchronously */ - auto t0 = Time::now(); - infer_request.Infer(); // input pre-processing is invoked on this step with resize and layout conversion - auto t1 = Time::now(); - ms d = std::chrono::duration_cast(t1 - t0); - total += d.count(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 8. Process output ------------------------------------------------------ - Blob::Ptr output = infer_request.GetBlob(output_name); - // Print classification results - ClassificationResult classificationResult(output, {input_image_path}); - classificationResult.print(); - // ----------------------------------------------------------------------------------------------------- - - std::cout << std::endl << "total inference time: " << total << std::endl; - std::cout << std::endl << "Throughput: " << 1000 * batchSize / total << " FPS" << std::endl; - std::cout << std::endl; - } catch (const std::exception & ex) { - std::cerr << ex.what() << std::endl; - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} diff --git a/inference-engine/samples/hello_classification/CMakeLists.txt b/inference-engine/samples/hello_classification/CMakeLists.txt index 845f7e9d70f12b..bc9b34f2fa4fa2 100644 --- a/inference-engine/samples/hello_classification/CMakeLists.txt +++ b/inference-engine/samples/hello_classification/CMakeLists.txt @@ -2,39 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -set (TARGET_NAME "hello_classification") - -file (GLOB SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -# Find OpenCV components if exist -find_package(OpenCV COMPONENTS imgcodecs QUIET) -if(NOT(OpenCV_FOUND)) - message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped") - return() -endif() - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${SRC}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${SRC}) - -if(WIN32) - # This target supports UNICODE on Windows - set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "/D_UNICODE /DUNICODE") -endif() - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} ${OpenCV_LIBRARIES}) - - -if(UNIX) - target_link_libraries(${TARGET_NAME} ${LIB_DL} pthread) -endif() +ie_add_sample(NAME hello_classification + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" + OPENCV_DEPENDENCIES imgcodecs) diff --git a/inference-engine/samples/hello_classification/README.md b/inference-engine/samples/hello_classification/README.md new file mode 100644 index 00000000000000..0d7db2eba30756 --- /dev/null +++ b/inference-engine/samples/hello_classification/README.md @@ -0,0 +1,36 @@ +# Hello Classification C++ Sample + +This topic describes how to run the Hello Infer Classification sample application. +The sample is simplified version of [Image Classification Sample Async](./inference-engine/samples/classification_sample_async/README.md) +and developed with support of UNICODE. +It demonstrates how to use the following Inference Engine API in applications: +* Synchronous Infer Request API +* Input auto-resize API. It allows to set image of the original size as input for a network with other input size. + Resize will be performed automatically by the corresponding plugin just before inference. + +There is also an API introduced to crop a ROI object and set it as input without additional memory re-allocation. +To properly demonstrate this API, it is required to run several networks in pipeline which is out of scope of this sample. +Please refer to [Security Barrier Camera Demo](./inference-engine/samples/security_barrier_camera_demo/README.md), or +[Crossroad Camera Demo](./inference-engine/samples/crossroad_camera_demo/README.md) with an example of using of new crop ROI API. + +Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details. + +> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). + +## Running + +To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). + +> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). + +You can do inference of an image using a trained AlexNet network on a GPU using the following command: +```sh +./hello_classification /alexnet_fp32.xml /cat.bmp GPU +``` + +## Sample Output + +The application outputs top-10 inference results. + +## See Also +* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/samples/hello_classification/main.cpp b/inference-engine/samples/hello_classification/main.cpp index b3b51584da1668..ad6b187e83999b 100644 --- a/inference-engine/samples/hello_classification/main.cpp +++ b/inference-engine/samples/hello_classification/main.cpp @@ -2,18 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include #include #include -#include +#include #ifdef UNICODE #include #endif -#include #include +#include #include using namespace InferenceEngine; @@ -32,17 +31,19 @@ int wmain(int argc, wchar_t *argv[]) { #endif try { // ------------------------------ Parsing and validation of input args --------------------------------- - if (argc != 3) { - tcout << _T("Usage : ./hello_classification ") << std::endl; + if (argc != 4) { + tcout << _T("Usage : ./hello_classification ") << std::endl; return EXIT_FAILURE; } const file_name_t input_model{argv[1]}; const file_name_t input_image_path{argv[2]}; + const std::string device_name{argv[3]}; + // ----------------------------------------------------------------------------------------------------- - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - InferencePlugin plugin(PluginDispatcher().getSuitablePlugin(TargetDevice::eCPU)); + // --------------------------- 1. Load inference engine instance ------------------------------------- + Core ie; // ----------------------------------------------------------------------------------------------------- // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ @@ -58,7 +59,11 @@ int wmain(int argc, wchar_t *argv[]) { InputInfo::Ptr input_info = network.getInputsInfo().begin()->second; std::string input_name = network.getInputsInfo().begin()->first; - input_info->setLayout(Layout::NCHW); + /* Mark input as resizable by setting of a resize algorithm. + * In this case we will be able to set an input blob of any shape to an infer request. + * Resize and layout conversions are executed automatically during inference */ + input_info->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR); + input_info->setLayout(Layout::NHWC); input_info->setPrecision(Precision::U8); // --------------------------- Prepare output blobs ---------------------------------------------------- @@ -68,8 +73,8 @@ int wmain(int argc, wchar_t *argv[]) { output_info->setPrecision(Precision::FP32); // ----------------------------------------------------------------------------------------------------- - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - ExecutableNetwork executable_network = plugin.LoadNetwork(network, {}); + // --------------------------- 4. Loading model to the device ------------------------------------------ + ExecutableNetwork executable_network = ie.LoadNetwork(network, device_name); // ----------------------------------------------------------------------------------------------------- // --------------------------- 5. Create infer request ------------------------------------------------- @@ -77,23 +82,10 @@ int wmain(int argc, wchar_t *argv[]) { // ----------------------------------------------------------------------------------------------------- // --------------------------- 6. Prepare input -------------------------------------------------------- - - cv::Mat image = cv::imread(fileNameToString(input_image_path)); - - /* Resize manually and copy data from the image to the input blob */ - Blob::Ptr input = infer_request.GetBlob(input_name); - auto input_data = input->buffer().as::value_type *>(); - - cv::resize(image, image, cv::Size(input_info->getTensorDesc().getDims()[3], input_info->getTensorDesc().getDims()[2])); - - size_t channels_number = input->getTensorDesc().getDims()[1]; - size_t image_size = input->getTensorDesc().getDims()[3] * input->getTensorDesc().getDims()[2]; - - for (size_t pid = 0; pid < image_size; ++pid) { - for (size_t ch = 0; ch < channels_number; ++ch) { - input_data[ch * image_size + pid] = image.at(pid)[ch]; - } - } + /* Read input image to a blob and set it to an infer request without resize and layout conversions. */ + cv::Mat image = cv::imread(input_image_path); + Blob::Ptr imgBlob = wrapMat2Blob(image); // just wrap Mat data by Blob::Ptr without allocating of new memory + infer_request.SetBlob(input_name, imgBlob); // infer_request accepts input blob of any size // ----------------------------------------------------------------------------------------------------- // --------------------------- 7. Do inference -------------------------------------------------------- @@ -106,11 +98,12 @@ int wmain(int argc, wchar_t *argv[]) { // Print classification results ClassificationResult classificationResult(output, {fileNameToString(input_image_path)}); classificationResult.print(); - // ----------------------------------------------------------------------------------------------------- } catch (const std::exception & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } + std::cout << "This sample is an API example, for any performance measurements " + "please use the dedicated benchmark_app tool" << std::endl; return EXIT_SUCCESS; } diff --git a/inference-engine/samples/hello_nv12_input_classification/CMakeLists.txt b/inference-engine/samples/hello_nv12_input_classification/CMakeLists.txt new file mode 100644 index 00000000000000..b629c23bae5057 --- /dev/null +++ b/inference-engine/samples/hello_nv12_input_classification/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright (C) 2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +ie_add_sample(NAME hello_nv12_input_classification + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") diff --git a/inference-engine/samples/hello_nv12_input_classification/README.md b/inference-engine/samples/hello_nv12_input_classification/README.md new file mode 100644 index 00000000000000..b22dc2e077b219 --- /dev/null +++ b/inference-engine/samples/hello_nv12_input_classification/README.md @@ -0,0 +1,55 @@ +# Hello NV12 Input Classification C++ Sample + +This topic describes how to run the Hello NV12 Input Classification sample application. +The sample is a simplified version of the [Image Classification Sample Async](./inference-engine/samples/classification_sample_async/README.md). +It demonstrates how to use the new NV12 automatic input pre-processing API of the Inference Engine in your applications. +Refer to [Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details. + +## How It Works + +Upon the start-up, the sample application reads command-line parameters, loads a network and sets an +image in the NV12 color format to an Inference Engine plugin. When inference is done, the +application outputs data to the standard output stream. + +The sample accepts an uncompressed image in the NV12 color format. To run the sample, you need to +convert your BGR/RGB image to NV12. To do this, you can use one of the widely available tools such +as FFmpeg\* or GStreamer\*. The following command shows how to convert an ordinary image into an +uncompressed NV12 image using FFmpeg: +```sh +ffmpeg -i cat.jpg -pix_fmt nv12 cat.yuv +``` + +> **NOTE**: +> +> * Because the sample reads raw image files, you should provide a correct image size along with the +> image path. The sample expects the logical size of the image, not the buffer size. For example, +> for 640x480 BGR/RGB image the corresponding NV12 logical image size is also 640x480, whereas the +> buffer size is 640x720. +> * The sample uses input autoresize API of the Inference Engine to simplify user-side +> pre-processing. +> * By default, this sample expects that network input has BGR channels order. If you trained your +> model to work with RGB order, you need to reconvert your model using the Model Optimizer tool +> with `--reverse_input_channels` argument specified. For more information about the argument, +> refer to **When to Reverse Input Channels** section of +> [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). + +## Running + +To run the sample, you can use public or pre-trained models. To download pre-trained models, use +the OpenVINO™ [Model Downloader](https://github.com/opencv/open_model_zoo/tree/master/model_downloader) +or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). + +> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the +> Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). + +You can perform inference on an NV12 image using a trained AlexNet network on CPU with the following command: +```sh +./hello_nv12_input_classification /alexnet_fp32.xml /cat.yuv 640x480 CPU +``` + +## Sample Output + +The application outputs top-10 inference results. + +## See Also +* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/samples/hello_nv12_input_classification/main.cpp b/inference-engine/samples/hello_nv12_input_classification/main.cpp new file mode 100644 index 00000000000000..c34622d8122a39 --- /dev/null +++ b/inference-engine/samples/hello_nv12_input_classification/main.cpp @@ -0,0 +1,205 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +using namespace InferenceEngine; + +/** + * \brief Parse image size provided as string in format WIDTHxHEIGHT + * @return parsed width and height + */ +std::pair parseImageSize(const std::string& size_string) { + auto delimiter_pos = size_string.find("x"); + if (delimiter_pos == std::string::npos + || delimiter_pos >= size_string.size() - 1 + || delimiter_pos == 0) { + std::stringstream err; + err << "Incorrect format of image size parameter, expected WIDTHxHEIGHT, " + "actual: " << size_string; + throw std::runtime_error(err.str()); + } + + size_t width = static_cast( + std::stoull(size_string.substr(0, delimiter_pos))); + size_t height = static_cast( + std::stoull(size_string.substr(delimiter_pos + 1, size_string.size()))); + + if (width == 0 || height == 0) { + throw std::runtime_error( + "Incorrect format of image size parameter, width and height must not be equal to 0"); + } + + if (width % 2 != 0 || height % 2 != 0) { + throw std::runtime_error("Unsupported image size, width and height must be even numbers"); + } + + return {width, height}; +} + +/** + * \brief Read image data from file + * @return buffer containing the image data + */ +std::unique_ptr readImageDataFromFile(const std::string& image_path, size_t size) { + std::ifstream file(image_path, std::ios_base::ate | std::ios_base::binary); + if (!file.good() || !file.is_open()) { + std::stringstream err; + err << "Cannot access input image file. File path: " << image_path; + throw std::runtime_error(err.str()); + } + + const size_t file_size = file.tellg(); + if (file_size < size) { + std::stringstream err; + err << "Invalid read size provided. File size: " << file_size << ", to read: " << size; + throw std::runtime_error(err.str()); + } + file.seekg(0); + + std::unique_ptr data(new unsigned char[size]); + file.read(reinterpret_cast(data.get()), size); + return data; +} + +/** + * \brief Sets batch size of the network to the specified value + */ +void setBatchSize(CNNNetwork& network, size_t batch) { + ICNNNetwork::InputShapes inputShapes = network.getInputShapes(); + for (auto& shape : inputShapes) { + auto& dims = shape.second; + if (dims.empty()) { + throw std::runtime_error("Network's input shapes have empty dimensions"); + } + dims[0] = batch; + } + network.reshape(inputShapes); +} + +/** +* @brief The entry point of the Inference Engine sample application +*/ +int main(int argc, char *argv[]) { + try { + // ------------------------------ Parsing and validatiing input arguments------------------------------ + if (argc != 5) { + std::cout << "Usage : ./hello_nv12_input_classification " + << std::endl; + return EXIT_FAILURE; + } + + const std::string input_model{argv[1]}; + const std::string input_image_path{argv[2]}; + size_t input_width = 0, input_height = 0; + std::tie(input_width, input_height) = parseImageSize(argv[3]); + const std::string device_name{argv[4]}; + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 1. Load inference engine ------------------------------------------------ + Core ie; + // ----------------------------------------------------------------------------------------------------- + + // -------------------------- 2. Read the IR generated by the Model Optimizer (.xml and .bin files) ---- + CNNNetReader network_reader; + network_reader.ReadNetwork(input_model); + network_reader.ReadWeights(input_model.substr(0, input_model.size() - 4) + ".bin"); + CNNNetwork network = network_reader.getNetwork(); + setBatchSize(network, 1); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 3. Configure input and output ------------------------------------------- + // --------------------------- Prepare input blobs ----------------------------------------------------- + if (network.getInputsInfo().empty()) { + std::cerr << "Network inputs info is empty" << std::endl; + return EXIT_FAILURE; + } + InputInfo::Ptr input_info = network.getInputsInfo().begin()->second; + std::string input_name = network.getInputsInfo().begin()->first; + + input_info->setLayout(Layout::NCHW); + input_info->setPrecision(Precision::U8); + // set input resize algorithm to enable input autoresize + input_info->getPreProcess().setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR); + // set input color format to ColorFormat::NV12 to enable automatic input color format + // pre-processing + input_info->getPreProcess().setColorFormat(ColorFormat::NV12); + + // --------------------------- Prepare output blobs ---------------------------------------------------- + if (network.getOutputsInfo().empty()) { + std::cerr << "Network outputs info is empty" << std::endl; + return EXIT_FAILURE; + } + DataPtr output_info = network.getOutputsInfo().begin()->second; + std::string output_name = network.getOutputsInfo().begin()->first; + + output_info->setPrecision(Precision::FP32); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 4. Loading a model to the device ---------------------------------------- + ExecutableNetwork executable_network = ie.LoadNetwork(network, device_name); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 5. Create an infer request ---------------------------------------------- + InferRequest infer_request = executable_network.CreateInferRequest(); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 6. Prepare input -------------------------------------------------------- + // read image with size converted to NV12 data size: height(NV12) = 3 / 2 * logical height + auto image_buf = readImageDataFromFile(input_image_path, input_width * (input_height * 3 / 2)); + + // --------------------------- Create a blob to hold the NV12 input data ------------------------------- + // Create tensor descriptors for Y and UV blobs + InferenceEngine::TensorDesc y_plane_desc(InferenceEngine::Precision::U8, + {1, 1, input_height, input_width}, InferenceEngine::Layout::NHWC); + InferenceEngine::TensorDesc uv_plane_desc(InferenceEngine::Precision::U8, + {1, 2, input_height / 2, input_width / 2}, InferenceEngine::Layout::NHWC); + const size_t offset = input_width * input_height; + + // Create blob for Y plane from raw data + Blob::Ptr y_blob = make_shared_blob(y_plane_desc, image_buf.get()); + // Create blob for UV plane from raw data + Blob::Ptr uv_blob = make_shared_blob(uv_plane_desc, image_buf.get() + offset); + // Create NV12Blob from Y and UV blobs + Blob::Ptr input = make_shared_blob(y_blob, uv_blob); + + // --------------------------- Set the input blob to the InferRequest ---------------------------------- + infer_request.SetBlob(input_name, input); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 7. Do inference --------------------------------------------------------- + /* Running the request synchronously */ + infer_request.Infer(); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 8. Process output ------------------------------------------------------- + Blob::Ptr output = infer_request.GetBlob(output_name); + + // Print classification results + ClassificationResult classificationResult(output, {input_image_path}); + + std::vector results; + TopResults(10, *output, results); + classificationResult.print(); + // ----------------------------------------------------------------------------------------------------- + } catch (const std::exception & ex) { + std::cerr << ex.what() << std::endl; + return EXIT_FAILURE; + } + std::cout << "This sample is an API example, for any performance measurements " + "please use the dedicated benchmark_app tool" << std::endl; + return EXIT_SUCCESS; +} diff --git a/inference-engine/samples/hello_query_device/CMakeLists.txt b/inference-engine/samples/hello_query_device/CMakeLists.txt new file mode 100644 index 00000000000000..b948ab44d8cd9b --- /dev/null +++ b/inference-engine/samples/hello_query_device/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright (C) 2019 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +ie_add_sample(NAME hello_query_device + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") diff --git a/inference-engine/samples/hello_query_device/README.md b/inference-engine/samples/hello_query_device/README.md new file mode 100644 index 00000000000000..9d32bd118efefe --- /dev/null +++ b/inference-engine/samples/hello_query_device/README.md @@ -0,0 +1,56 @@ +# Hello Query Device C++ Sample + +This topic demonstrates how to run the Hello Query Device sample application, which queries Inference Engine devices and prints their metrics and default configuration values. The sample shows how to use [Query Device API feature](./docs/IE_DG/QueryDeviceAPI.md). +> **NOTE:** This topic describes usage of C++ implementation of the Query Device Sample. +> For the Python* implementation, refer to [Hello Query Device Python* Sample](./inference-engine/ie_brudges/python/sample/hello_query_device/README.md) +## Running + +To see quired information, run the following: +```sh +./hello_query_device +``` + +## Sample Output + +The application prints all available devices with their supported metrics and default values for configuration parameters: + +``` +Available devices: + Device: CPU + Metrics: + AVAILABLE_DEVICES : [ 0 ] + SUPPORTED_METRICS : [ AVAILABLE_DEVICES SUPPORTED_METRICS FULL_DEVICE_NAME OPTIMIZATION_CAPABILITIES SUPPORTED_CONFIG_KEYS RANGE_FOR_ASYNC_INFER_REQUESTS RANGE_FOR_STREAMS ] + FULL_DEVICE_NAME : Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz + OPTIMIZATION_CAPABILITIES : [ WINOGRAD FP32 INT8 BIN ] + SUPPORTED_CONFIG_KEYS : [ CPU_BIND_THREAD CPU_THREADS_NUM CPU_THROUGHPUT_STREAMS DUMP_EXEC_GRAPH_AS_DOT DYN_BATCH_ENABLED DYN_BATCH_LIMIT EXCLUSIVE_ASYNC_REQUESTS PERF_COUNT ] + ... + Default values for device configuration keys: + CPU_BIND_THREAD : YES + CPU_THREADS_NUM : 0 + CPU_THROUGHPUT_STREAMS : 1 + DUMP_EXEC_GRAPH_AS_DOT : "" + DYN_BATCH_ENABLED : NO + DYN_BATCH_LIMIT : 0 + EXCLUSIVE_ASYNC_REQUESTS : NO + PERF_COUNT : NO + + Device: FPGA + Metrics: + AVAILABLE_DEVICES : [ 0 ] + SUPPORTED_METRICS : [ AVAILABLE_DEVICES SUPPORTED_METRICS SUPPORTED_CONFIG_KEYS FULL_DEVICE_NAME OPTIMIZATION_CAPABILITIES RANGE_FOR_ASYNC_INFER_REQUESTS ] + SUPPORTED_CONFIG_KEYS : [ DEVICE_ID PERF_COUNT EXCLUSIVE_ASYNC_REQUESTS DLIA_IO_TRANSFORMATIONS_NATIVE DLIA_ARCH_ROOT_DIR DLIA_PERF_ESTIMATION ] + FULL_DEVICE_NAME : a10gx_2ddr : Intel Vision Accelerator Design with Intel Arria 10 FPGA (acla10_1150_sg10) + OPTIMIZATION_CAPABILITIES : [ FP16 ] + RANGE_FOR_ASYNC_INFER_REQUESTS : { 2, 5, 1 } + Default values for device configuration keys: + DEVICE_ID : [ 0 ] + PERF_COUNT : true + EXCLUSIVE_ASYNC_REQUESTS : false + DLIA_IO_TRANSFORMATIONS_NATIVE : false + DLIA_PERF_ESTIMATION : true +``` + +## See Also +* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) +* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) +* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/hello_query_device/main.cpp b/inference-engine/samples/hello_query_device/main.cpp new file mode 100644 index 00000000000000..2d43e5f02bf2ab --- /dev/null +++ b/inference-engine/samples/hello_query_device/main.cpp @@ -0,0 +1,113 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include + +#include + +using namespace InferenceEngine; + +namespace { + +template +std::ostream & operator << (std::ostream & stream, const std::vector & v) { + stream << "[ "; + for (auto && value : v) + stream << value << " "; + return stream << "]"; +} + +void printParameterValue(const Parameter & value) { + if (value.is()) { + std::cout << std::boolalpha << value.as() << std::noboolalpha << std::endl; + } else if (value.is()) { + std::cout << value.as() << std::endl; + } else if (value.is()) { + std::cout << value.as() << std::endl; + } else if (value.is()) { + std::cout << value.as() << std::endl; + } else if (value.is()) { + std::string stringValue = value.as(); + std::cout << (stringValue.empty() ? "\"\"" : stringValue) << std::endl; + } else if (value.is >()) { + std::cout << value.as >() << std::endl; + } else if (value.is >()) { + std::cout << value.as >() << std::endl; + } else if (value.is >()) { + std::cout << value.as >() << std::endl; + } else if (value.is >()) { + std::cout << value.as >() << std::endl; + } else if (value.is >()) { + auto values = value.as >(); + std::cout << "{ "; + std::cout << std::get<0>(values) << ", "; + std::cout << std::get<1>(values) << ", "; + std::cout << std::get<2>(values); + std::cout << " }"; + std::cout << std::endl; + } else if (value.is >()) { + auto values = value.as >(); + std::cout << "{ "; + std::cout << std::get<0>(values) << ", "; + std::cout << std::get<1>(values); + std::cout << " }"; + std::cout << std::endl; + } else { + std::cout << "UNSUPPORTED TYPE" << std::endl; + } +} + +} // namespace + +int main(int argc, char *argv[]) { + try { + // ------------------------------ Parsing and validation of input args --------------------------------- + if (argc != 1) { + std::cout << "Usage : ./hello_query_device" << std::endl; + return EXIT_FAILURE; + } + + // --------------------------- 1. Load Inference engine instance ------------------------------------- + + Core ie; + + // --------------------------- 2. Get list of available devices ------------------------------------- + + std::vector availableDevices = ie.GetAvailableDevices(); + + // --------------------------- 3. Query and print supported metrics and config keys-------------------- + + std::cout << "Available devices: " << std::endl; + for (auto && device : availableDevices) { + std::cout << "\tDevice: " << device << std::endl; + + std::cout << "\tMetrics: " << std::endl; + std::vector supportedMetrics = ie.GetMetric(device, METRIC_KEY(SUPPORTED_METRICS)); + for (auto && metricName : supportedMetrics) { + std::cout << "\t\t" << metricName << " : " << std::flush; + printParameterValue(ie.GetMetric(device, metricName)); + } + + std::cout << "\tDefault values for device configuration keys: " << std::endl; + std::vector supportedConfigKeys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + for (auto && configKey : supportedConfigKeys) { + std::cout << "\t\t" << configKey << " : " << std::flush; + printParameterValue(ie.GetConfig(device, configKey)); + } + + std::cout << std::endl; + } + } catch (const std::exception & ex) { + std::cerr << ex.what() << std::endl; + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/inference-engine/samples/hello_request_classification/CMakeLists.txt b/inference-engine/samples/hello_request_classification/CMakeLists.txt deleted file mode 100644 index c7dbb1e5c9c3e2..00000000000000 --- a/inference-engine/samples/hello_request_classification/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set (TARGET_NAME "hello_request_classification") - -file (GLOB SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -# Find OpenCV components if exist -find_package(OpenCV COMPONENTS imgcodecs QUIET) -if(NOT(OpenCV_FOUND)) - message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped") - return() -endif() - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${SRC}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${SRC}) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} ${OpenCV_LIBRARIES}) - - -if(UNIX) - target_link_libraries(${TARGET_NAME} ${LIB_DL} pthread) -endif() diff --git a/inference-engine/samples/hello_request_classification/README.md b/inference-engine/samples/hello_request_classification/README.md deleted file mode 100644 index 261b883753b807..00000000000000 --- a/inference-engine/samples/hello_request_classification/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# Hello Infer Request Classification C++ Sample - -This topic describes how to run the Hello Infer Classification sample application. -The sample is simplified version of [Image Classification Sample](./inference-engine/samples/classification_sample/README.md). -It demonstrates how to use the new Infer Request API of Inference Engine in applications. Refer to -[Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details. - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). - -## Running - -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). - -> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -You can do inference on an image using a trained AlexNet network on CPU using the following command: -```sh -./hello_autoresize_classification /alexnet_fp32.xml /cat.bmp CPU -``` - -## Sample Output - -The application outputs top-10 inference results. - -## See Also -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/samples/hello_request_classification/main.cpp b/inference-engine/samples/hello_request_classification/main.cpp deleted file mode 100644 index e03142b8f7699a..00000000000000 --- a/inference-engine/samples/hello_request_classification/main.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include - -#include -#include -#include - -using namespace InferenceEngine; - -int main(int argc, char *argv[]) { - try { - // ------------------------------ Parsing and validation of input args --------------------------------- - if (argc != 4) { - std::cout << "Usage : ./hello_request_classification " - << std::endl; - return EXIT_FAILURE; - } - - const std::string input_model{argv[1]}; - const std::string input_image_path{argv[2]}; - const std::string device_name{argv[3]}; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - InferencePlugin plugin = PluginDispatcher().getPluginByDevice(device_name); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ - CNNNetReader network_reader; - network_reader.ReadNetwork(input_model); - network_reader.ReadWeights(input_model.substr(0, input_model.size() - 4) + ".bin"); - network_reader.getNetwork().setBatchSize(1); - CNNNetwork network = network_reader.getNetwork(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 3. Configure input & output --------------------------------------------- - - // --------------------------- Prepare input blobs ----------------------------------------------------- - /** Taking information about all topology inputs **/ - InputsDataMap input_info(network.getInputsInfo()); - /** Iterating over all input info**/ - for (auto &item : input_info) { - InputInfo::Ptr input_data = item.second; - input_data->setPrecision(Precision::U8); - input_data->setLayout(Layout::NCHW); - } - - // ------------------------------ Prepare output blobs ------------------------------------------------- - /** Taking information about all topology outputs **/ - OutputsDataMap output_info(network.getOutputsInfo()); - /** Iterating over all output info**/ - for (auto &item : output_info) { - DataPtr output_data = item.second; - if (!output_data) { - throw std::runtime_error("Output data pointer is invalid"); - } - output_data->setPrecision(Precision::FP32); - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - ExecutableNetwork executable_network = plugin.LoadNetwork(network, {}); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 5. Create infer request ------------------------------------------------- - InferRequest async_infer_request = executable_network.CreateInferRequest(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 6. Prepare input -------------------------------------------------------- - for (auto &item : input_info) { - cv::Mat image = cv::imread(input_image_path); - - auto input_name = item.first; - InputInfo::Ptr input_data = item.second; - - /** Getting input blob **/ - Blob::Ptr input = async_infer_request.GetBlob(input_name); - auto input_buffer = input->buffer().as::value_type *>(); - - /** Fill input tensor with planes. First b channel, then g and r channels **/ - if (image.empty()) throw std::logic_error("Invalid image at path: " + input_image_path); - - /* Resize and copy data from the image to the input blob */ - cv::resize(image, image, cv::Size(input_data->getTensorDesc().getDims()[3], input_data->getTensorDesc().getDims()[2])); - auto dims = input->getTensorDesc().getDims(); - size_t channels_number = dims[1]; - size_t image_size = dims[3] * dims[2]; - for (size_t pid = 0; pid < image_size; ++pid) { - for (size_t ch = 0; ch < channels_number; ++ch) { - input_buffer[ch * image_size + pid] = image.at(pid)[ch]; - } - } - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 7. Do inference --------------------------------------------------------- - const int max_number_of_iterations = 10; - int iterations = max_number_of_iterations; - /** Set callback function for calling on completion of async request **/ - async_infer_request.SetCompletionCallback( - [&] { - std::cout << "Completed " << max_number_of_iterations - iterations + 1 << " async request" - << std::endl; - if (--iterations) { - /** Start async request (max_number_of_iterations - 1) more times **/ - async_infer_request.StartAsync(); - } - }); - /** Start async request for the first time **/ - async_infer_request.StartAsync(); - /** Wait all repetition of async requests **/ - for (int i = 0; i < max_number_of_iterations; i++) { - async_infer_request.Wait(IInferRequest::WaitMode::RESULT_READY); - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 8. Process output ------------------------------------------------------- - for (auto &item : output_info) { - auto output_name = item.first; - Blob::Ptr output = async_infer_request.GetBlob(output_name);; - // Print classification results - ClassificationResult classificationResult(output, {input_image_path}); - classificationResult.print(); - } - // ----------------------------------------------------------------------------------------------------- - } catch (const std::exception & ex) { - std::cerr << ex.what() << std::endl; - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} diff --git a/inference-engine/samples/hello_reshape_ssd/CMakeLists.txt b/inference-engine/samples/hello_reshape_ssd/CMakeLists.txt new file mode 100644 index 00000000000000..a7e5ec487376ab --- /dev/null +++ b/inference-engine/samples/hello_reshape_ssd/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright (C) 2018-2019 Intel Corporation + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ie_add_sample(NAME hello_reshape_ssd + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" + HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/reshape_ssd_extension.hpp" + OPENCV_DEPENDENCIES imgcodecs) diff --git a/inference-engine/samples/hello_reshape_ssd/README.md b/inference-engine/samples/hello_reshape_ssd/README.md new file mode 100644 index 00000000000000..c33c97db67a2ae --- /dev/null +++ b/inference-engine/samples/hello_reshape_ssd/README.md @@ -0,0 +1,28 @@ +# Hello Reshape SSD C++ Sample + +This topic demonstrates how to run the Hello Reshape SSD application, which does inference using object detection +networks like SSD-VGG. The sample shows how to use [Shape Inference feature](./docs/IE_DG/ShapeInference.md). + +> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). + +## Running + +To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). + +> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). + +You can use the following command to do inference on CPU of an image using a trained SSD network: +```sh +./hello_reshape_ssd /ssd_300.xml /500x500.bmp CPU 3 +``` + +## Sample Output + +The application renders an image with detected objects enclosed in rectangles. It outputs the list of classes +of the detected objects along with the respective confidence values and the coordinates of the +rectangles to the standard output stream. + +## See Also +* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) +* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) +* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/hello_reshape_ssd/main.cpp b/inference-engine/samples/hello_reshape_ssd/main.cpp new file mode 100644 index 00000000000000..96a64c437c6b3f --- /dev/null +++ b/inference-engine/samples/hello_reshape_ssd/main.cpp @@ -0,0 +1,173 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include +#include +#include + +#include "reshape_ssd_extension.hpp" + +using namespace InferenceEngine; + +int main(int argc, char* argv[]) { + try { + // ------------------------------ Parsing and validation of input args --------------------------------- + if (argc != 5) { + std::cout << "Usage : ./hello_reshape_ssd " + << std::endl; + return EXIT_FAILURE; + } + const std::string input_model{argv[1]}; + const std::string input_image_path{argv[2]}; + const std::string device_name{argv[3]}; + const size_t batch_size{std::stoul(argv[4])}; + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 1. Load inference engine ------------------------------------- + Core ie; + + IExtensionPtr cpuExtension, inPlaceExtension; + if (device_name.find("CPU") != std::string::npos) { + cpuExtension = std::make_shared(); + inPlaceExtension = std::make_shared(); + ie.AddExtension(cpuExtension, "CPU"); + // register sample's custom kernel (CustomReLU) + ie.AddExtension(inPlaceExtension, "CPU"); + } + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ + CNNNetReader network_reader; + network_reader.ReadNetwork(input_model); + network_reader.ReadWeights(input_model.substr(0, input_model.size() - 4) + ".bin"); + CNNNetwork network = network_reader.getNetwork(); + + OutputsDataMap outputs_info(network.getOutputsInfo()); + InputsDataMap inputs_info(network.getInputsInfo()); + if (inputs_info.size() != 1 && outputs_info.size() != 1) + throw std::logic_error("Sample supports clean SSD network with one input and one output"); + + // --------------------------- Resize network to match image sizes and given batch---------------------- + if (device_name.find("CPU") != std::string::npos) { + network.AddExtension(cpuExtension); + // register sample's custom shape inference (CustomReLU) + network.AddExtension(inPlaceExtension); + } + auto input_shapes = network.getInputShapes(); + std::string input_name; + SizeVector input_shape; + std::tie(input_name, input_shape) = *input_shapes.begin(); + cv::Mat image = cv::imread(input_image_path); + input_shape[0] = batch_size; + input_shape[2] = image.rows; + input_shape[3] = image.cols; + input_shapes[input_name] = input_shape; + std::cout << "Resizing network to the image size = [" << image.rows << "x" << image.cols << "] " + << "with batch = " << batch_size << std::endl; + network.reshape(input_shapes); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 3. Configure input & output --------------------------------------------- + // --------------------------- Prepare input blobs ----------------------------------------------------- + InputInfo::Ptr input_info; + std::tie(input_name, input_info) = *inputs_info.begin(); + input_info->setLayout(Layout::NCHW); + input_info->setPrecision(Precision::U8); + // --------------------------- Prepare output blobs ---------------------------------------------------- + DataPtr output_info; + std::string output_name; + std::tie(output_name, output_info) = *outputs_info.begin(); + if (output_info->getCreatorLayer().lock()->type != "DetectionOutput") + throw std::logic_error("Can't find a DetectionOutput layer in the topology"); + const SizeVector output_shape = output_info->getTensorDesc().getDims(); + const int max_proposal_count = output_shape[2]; + const int object_size = output_shape[3]; + if (object_size != 7) { + throw std::logic_error("Output item should have 7 as a last dimension"); + } + if (output_shape.size() != 4) { + throw std::logic_error("Incorrect output dimensions for SSD model"); + } + if (output_info == nullptr) { + THROW_IE_EXCEPTION << "[SAMPLES] internal error - output information is empty"; + } + + output_info->setPrecision(Precision::FP32); + + auto dumpVec = [](const SizeVector& vec) -> std::string { + if (vec.empty()) return "[]"; + std::stringstream oss; + oss << "[" << vec[0]; + for (size_t i = 1; i < vec.size(); i++) oss << "," << vec[i]; + oss << "]"; + return oss.str(); + }; + std::cout << "Resulting input shape = " << dumpVec(input_shape) << std::endl; + std::cout << "Resulting output shape = " << dumpVec(output_shape) << std::endl; + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 4. Loading model to the device ------------------------------------------ + ExecutableNetwork executable_network = ie.LoadNetwork(network, device_name); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 5. Create infer request ------------------------------------------------- + InferRequest infer_request = executable_network.CreateInferRequest(); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 6. Prepare input -------------------------------------------------------- + Blob::Ptr input = infer_request.GetBlob(input_name); + for (size_t b = 0; b < batch_size; b++) { + matU8ToBlob(image, input, b); + } + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 7. Do inference -------------------------------------------------------- + infer_request.Infer(); + // ----------------------------------------------------------------------------------------------------- + + // --------------------------- 8. Process output ------------------------------------------------------ + Blob::Ptr output = infer_request.GetBlob(output_name); + const float* detection = output->buffer().as::value_type*>(); + + /* Each detection has image_id that denotes processed image */ + for (int cur_proposal = 0; cur_proposal < max_proposal_count; cur_proposal++) { + float image_id = detection[cur_proposal * object_size + 0]; + float label = detection[cur_proposal * object_size + 1]; + float confidence = detection[cur_proposal * object_size + 2]; + /* CPU and GPU devices have difference in DetectionOutput layer, so we need both checks */ + if (image_id < 0 || confidence == 0) { + continue; + } + + float xmin = detection[cur_proposal * object_size + 3] * image.cols; + float ymin = detection[cur_proposal * object_size + 4] * image.rows; + float xmax = detection[cur_proposal * object_size + 5] * image.cols; + float ymax = detection[cur_proposal * object_size + 6] * image.rows; + + if (confidence > 0.5) { + /** Drawing only objects with >50% probability **/ + std::ostringstream conf; + conf << ":" << std::fixed << std::setprecision(3) << confidence; + cv::rectangle(image, cv::Point2f(xmin, ymin), cv::Point2f(xmax, ymax), cv::Scalar(0, 0, 255)); + std::cout << "[" << cur_proposal << "," << label << "] element, prob = " << confidence << + ", bbox = (" << xmin << "," << ymin << ")-(" << xmax << "," << ymax << ")" << ", batch id = " + << image_id << std::endl; + } + } + + cv::imwrite("hello_reshape_ssd_output.jpg", image); + std::cout << "The resulting image was saved in the file: hello_reshape_ssd_output.jpg" << std::endl; + // ----------------------------------------------------------------------------------------------------- + } catch (const std::exception& ex) { + std::cerr << ex.what() << std::endl; + return EXIT_FAILURE; + } + std::cout << std::endl << "This sample is an API example, for any performance measurements " + "please use the dedicated benchmark_app tool" << std::endl; + return EXIT_SUCCESS; +} diff --git a/inference-engine/samples/hello_shape_infer_ssd/shape_infer_extension.hpp b/inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp similarity index 100% rename from inference-engine/samples/hello_shape_infer_ssd/shape_infer_extension.hpp rename to inference-engine/samples/hello_reshape_ssd/reshape_ssd_extension.hpp diff --git a/inference-engine/samples/hello_shape_infer_ssd/CMakeLists.txt b/inference-engine/samples/hello_shape_infer_ssd/CMakeLists.txt deleted file mode 100644 index b0ef62b8b6cd02..00000000000000 --- a/inference-engine/samples/hello_shape_infer_ssd/CMakeLists.txt +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set(TARGET_NAME "hello_shape_infer_ssd") - -file(GLOB SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -file(GLOB HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp - ) - -# Find OpenCV components if exist -find_package(OpenCV COMPONENTS imgcodecs QUIET) -if(NOT(OpenCV_FOUND)) - message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped") - return() -endif() - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${SRC}) -source_group("headers" FILES ${HEADERS}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${SRC} ${HEADERS}) - -set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} IE::ie_cpu_extension ${OpenCV_LIBRARIES}) - -if (UNIX) - target_link_libraries(${TARGET_NAME} ${LIB_DL}) -endif () diff --git a/inference-engine/samples/hello_shape_infer_ssd/README.md b/inference-engine/samples/hello_shape_infer_ssd/README.md deleted file mode 100644 index e45be9d7e4707b..00000000000000 --- a/inference-engine/samples/hello_shape_infer_ssd/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Hello Shape Infer C++ Sample - -This topic demonstrates how to run the Hello Shape Infer SSD application, which does inference using object detection -networks like SSD-VGG. The sample shows how to use [Shape Inference feature](./docs/IE_DG/ShapeInference.md). - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). - -## Running - -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). - -> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -You can use the following command to do inference on CPU on an image using a trained SSD network: -```sh -./hello_shape_infer_ssd /ssd_300.xml /500x500.bmp CPU 3 -``` - -## Sample Output - -The application renders an image with detected objects enclosed in rectangles. It outputs the list of classes -of the detected objects along with the respective confidence values and the coordinates of the -rectangles to the standard output stream. - -## See Also -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) -* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) -* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/inference-engine/samples/hello_shape_infer_ssd/main.cpp b/inference-engine/samples/hello_shape_infer_ssd/main.cpp deleted file mode 100644 index ee691e50e2a97f..00000000000000 --- a/inference-engine/samples/hello_shape_infer_ssd/main.cpp +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include - -#include -#include -#include - -#include "shape_infer_extension.hpp" - -using namespace InferenceEngine; - -int main(int argc, char* argv[]) { - try { - // ------------------------------ Parsing and validation of input args --------------------------------- - if (argc != 5) { - std::cout << "Usage : ./hello_shape_infer_ssd " - << std::endl; - return EXIT_FAILURE; - } - const std::string input_model{argv[1]}; - const std::string input_image_path{argv[2]}; - const std::string device_name{argv[3]}; - const size_t batch_size{std::stoul(argv[4])}; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - InferencePlugin plugin = PluginDispatcher().getPluginByDevice(device_name); - IExtensionPtr cpuExtension, inPlaceExtension; - if (device_name == "CPU") { - cpuExtension = std::make_shared(); - inPlaceExtension = std::make_shared(); - plugin.AddExtension(cpuExtension); - // register sample's custom kernel (CustomReLU) - plugin.AddExtension(inPlaceExtension); - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ - CNNNetReader network_reader; - network_reader.ReadNetwork(input_model); - network_reader.ReadWeights(input_model.substr(0, input_model.size() - 4) + ".bin"); - CNNNetwork network = network_reader.getNetwork(); - - OutputsDataMap outputs_info(network.getOutputsInfo()); - InputsDataMap inputs_info(network.getInputsInfo()); - if (inputs_info.size() != 1 && outputs_info.size() != 1) - throw std::logic_error("Sample supports clean SSD network with one input and one output"); - - // --------------------------- Resize network to match image sizes and given batch---------------------- - if (device_name == "CPU") { - network.AddExtension(cpuExtension); - // register sample's custom shape inference (CustomReLU) - network.AddExtension(inPlaceExtension); - } - auto input_shapes = network.getInputShapes(); - std::string input_name; - SizeVector input_shape; - std::tie(input_name, input_shape) = *input_shapes.begin(); - cv::Mat image = cv::imread(input_image_path); - input_shape[0] = batch_size; - input_shape[2] = image.rows; - input_shape[3] = image.cols; - input_shapes[input_name] = input_shape; - std::cout << "Resizing network to the image size = [" << image.rows << "x" << image.cols << "] " - << "with batch = " << batch_size << std::endl; - network.reshape(input_shapes); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 3. Configure input & output --------------------------------------------- - // --------------------------- Prepare input blobs ----------------------------------------------------- - InputInfo::Ptr input_info; - std::tie(input_name, input_info) = *inputs_info.begin(); - input_info->setLayout(Layout::NCHW); - input_info->setPrecision(Precision::U8); - // --------------------------- Prepare output blobs ---------------------------------------------------- - DataPtr output_info; - std::string output_name; - std::tie(output_name, output_info) = *outputs_info.begin(); - if (output_info->creatorLayer.lock()->type != "DetectionOutput") - throw std::logic_error("Can't find a DetectionOutput layer in the topology"); - const SizeVector output_shape = output_info->getTensorDesc().getDims(); - const int max_proposal_count = output_shape[2]; - const int object_size = output_shape[3]; - if (object_size != 7) { - throw std::logic_error("Output item should have 7 as a last dimension"); - } - if (output_shape.size() != 4) { - throw std::logic_error("Incorrect output dimensions for SSD model"); - } - if (output_info == nullptr) { - THROW_IE_EXCEPTION << "[SAMPLES] internal error - output information is empty"; - } - - output_info->setPrecision(Precision::FP32); - - auto dumpVec = [](const SizeVector& vec) -> std::string { - if (vec.empty()) return "[]"; - std::stringstream oss; - oss << "[" << vec[0]; - for (size_t i = 1; i < vec.size(); i++) oss << "," << vec[i]; - oss << "]"; - return oss.str(); - }; - std::cout << "Resulting input shape = " << dumpVec(input_shape) << std::endl; - std::cout << "Resulting output shape = " << dumpVec(output_shape) << std::endl; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - ExecutableNetwork executable_network = plugin.LoadNetwork(network, {}); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 5. Create infer request ------------------------------------------------- - InferRequest infer_request = executable_network.CreateInferRequest(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 6. Prepare input -------------------------------------------------------- - Blob::Ptr input = infer_request.GetBlob(input_name); - for (size_t b = 0; b < batch_size; b++) { - matU8ToBlob(image, input, b); - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 7. Do inference -------------------------------------------------------- - infer_request.Infer(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 8. Process output ------------------------------------------------------ - Blob::Ptr output = infer_request.GetBlob(output_name); - const float* detection = output->buffer().as::value_type*>(); - - /* Each detection has image_id that denotes processed image */ - for (int cur_proposal = 0; cur_proposal < max_proposal_count; cur_proposal++) { - float image_id = detection[cur_proposal * object_size + 0]; - float label = detection[cur_proposal * object_size + 1]; - float confidence = detection[cur_proposal * object_size + 2]; - /* CPU and GPU plugins have difference in DetectionOutput layer, so we need both checks */ - if (image_id < 0 || confidence == 0) { - continue; - } - - float xmin = detection[cur_proposal * object_size + 3] * image.cols; - float ymin = detection[cur_proposal * object_size + 4] * image.rows; - float xmax = detection[cur_proposal * object_size + 5] * image.cols; - float ymax = detection[cur_proposal * object_size + 6] * image.rows; - - if (confidence > 0.5) { - /** Drawing only objects with >50% probability **/ - std::ostringstream conf; - conf << ":" << std::fixed << std::setprecision(3) << confidence; - cv::rectangle(image, cv::Point2f(xmin, ymin), cv::Point2f(xmax, ymax), cv::Scalar(0, 0, 255)); - std::cout << "[" << cur_proposal << "," << label << "] element, prob = " << confidence << - ", bbox = (" << xmin << "," << ymin << ")-(" << xmax << "," << ymax << ")" << ", batch id = " - << image_id << std::endl; - } - } - - cv::imwrite("hello_shape_infer_ssd_output.jpg", image); - std::cout << "The resulting image was saved in the file: hello_shape_infer_ssd_output.jpg" << std::endl; - // ----------------------------------------------------------------------------------------------------- - } catch (const std::exception& ex) { - std::cerr << ex.what() << std::endl; - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} diff --git a/inference-engine/samples/lenet_network_graph_builder/CMakeLists.txt b/inference-engine/samples/lenet_network_graph_builder/CMakeLists.txt deleted file mode 100644 index f8960bd2ab97f8..00000000000000 --- a/inference-engine/samples/lenet_network_graph_builder/CMakeLists.txt +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set (TARGET_NAME "lenet_network_graph_builder") - -file (GLOB MAIN_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -file (GLOB MAIN_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${MAIN_SRC}) -source_group("include" FILES ${MAIN_HEADERS}) - - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS}) - -add_dependencies(${TARGET_NAME} gflags) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" - COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} gflags format_reader) - -if(UNIX) - target_link_libraries( ${TARGET_NAME} ${LIB_DL} pthread) -endif() diff --git a/inference-engine/samples/lenet_network_graph_builder/LeNet.bin b/inference-engine/samples/lenet_network_graph_builder/LeNet.bin deleted file mode 100644 index 7ce66d0ba63b7a..00000000000000 Binary files a/inference-engine/samples/lenet_network_graph_builder/LeNet.bin and /dev/null differ diff --git a/inference-engine/samples/lenet_network_graph_builder/README.md b/inference-engine/samples/lenet_network_graph_builder/README.md deleted file mode 100644 index 1cb1961df84df9..00000000000000 --- a/inference-engine/samples/lenet_network_graph_builder/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# LeNet Number Classifications Network Using Graph Builder API - -This sample demonstrates how to execute inference using Inference Engine Graph Builder API to build a network on example of the LeNet classifications network. - -XML file is not required for network building now. Inference Engine Graph Builder API allows building of a network "on the fly" from source code. The sample uses one-channel `ubyte` pictures as input. - -## How It Works - -Upon the start-up the sample reads command line parameters and builds a network using Graph Builder API and passed weights file. -Then, the application loads built network and an image to the Inference Engine plugin. - -When inference is done, the application outputs inference results to the standard output stream. - -> **NOTE**: This sample is implemented to support models with FP32 weights only. - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). - -## Running - -Running the application with the `-h` option yields the following usage message: -```sh -./lenet_network_graph_builder -h -InferenceEngine: - API version ............ - Build .................. - -lenet_network_graph_builder [OPTION] -Options: - - -h Print a usage message. - -m "" Path to a .bin file with weights for trained model - -i "" Required. Path to image or folder with images - -d "" Specify the target device to infer on this. Sample will look for a suitable plugin for device specified. Default value is CPU - -pp "" Path to a plugin folder - -pc Enables per-layer performance report - -nt "" Number of top results. Default value is 10 - -ni "" Number of iterations. Default value is 1 - -``` - -Running the application with empty list of options yields the usage message given above. - -For example, to do inference of an ubyte image on a GPU run the following command: -```sh -./lenet_network_graph_builder -i -m -d GPU -``` - -## Sample Output - -By default the application outputs top-10 inference results for each infer request. -In addition to this information it will provide throughput value measured in frames per seconds. - -## See Also -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/samples/lenet_network_graph_builder/lenet_network_graph_builder.hpp b/inference-engine/samples/lenet_network_graph_builder/lenet_network_graph_builder.hpp deleted file mode 100644 index 47c62772523420..00000000000000 --- a/inference-engine/samples/lenet_network_graph_builder/lenet_network_graph_builder.hpp +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - -#ifdef _WIN32 -#include -#else -#include -#endif - -/// @brief message for help argument -static const char help_message[] = "Print a usage message"; - -/// @brief message for images argument -static const char input_message[] = "Required. Path to image or folder with images"; - -/// @brief message for model argument -static const char model_message[] = "Path to an .bin file with weights for trained model"; - -/// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Specify the target device to infer on this. " \ - "Sample will look for a suitable plugin for device specified. " \ - "Default value is CPU"; - -/// @brief message for plugin_path argument -static const char plugin_path_message[] = "Path to a plugin folder"; - -/// @brief message for performance counters -static const char performance_counter_message[] = "Enables per-layer performance report"; - -/// @brief message for top results number -static const char ntop_message[] = "Number of top results. Default 10"; - -/// @brief message for iterations count -static const char iterations_count_message[] = "Number of iterations. Default value is 1"; - -/// \brief Define flag for showing help message
-DEFINE_bool(h, false, help_message); - -/// \brief Define parameter for set weight file
-/// It is a parameter -DEFINE_string(m, "", model_message); - -/// \brief Define parameter for set image file
-/// It is a required parameter -DEFINE_string(i, "", input_message); - -/// \brief device the target device to infer on
-DEFINE_string(d, "CPU", target_device_message); - -/// \brief Define parameter for set path to plugins
-/// Default is ./lib -DEFINE_string(pp, "", plugin_path_message); - -/// @brief Enable per-layer performance report -DEFINE_bool(pc, false, performance_counter_message); - -/// @brief Top results number (default 10)
-DEFINE_uint32(nt, 10, ntop_message); - -/// @brief Iterations count (default 1) -DEFINE_uint32(ni, 1, iterations_count_message); - -/** - * \brief This function show a help message - */ -static void showUsage() { - std::cout << std::endl; - std::cout << "lenet_network_graph_builder [OPTION]" << std::endl; - std::cout << "Options:" << std::endl; - std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -i \"\" " << input_message << std::endl; - std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -pp \"\" " << plugin_path_message << std::endl; - std::cout << " -pc " << performance_counter_message << std::endl; - std::cout << " -nt \"\" " << ntop_message << std::endl; - std::cout << " -ni \"\" " << iterations_count_message << std::endl; -} diff --git a/inference-engine/samples/lenet_network_graph_builder/main.cpp b/inference-engine/samples/lenet_network_graph_builder/main.cpp deleted file mode 100644 index ab63bab4c22240..00000000000000 --- a/inference-engine/samples/lenet_network_graph_builder/main.cpp +++ /dev/null @@ -1,344 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include "lenet_network_graph_builder.hpp" - -using namespace InferenceEngine; - -bool ParseAndCheckCommandLine(int argc, char *argv[]) { - slog::info << "Parsing input parameters" << slog::endl; - - gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); - if (FLAGS_h) { - showUsage(); - return false; - } - - if (FLAGS_ni <= 0) { - throw std::logic_error("Incorrect value for ni argument. It should be more than 0"); - } - - if (FLAGS_nt <= 0 || FLAGS_nt > 10) { - throw std::logic_error("Incorrect value for nt argument. It should be more than 0 and less than 10"); - } - - return true; -} - -void readFile(const std::string &file_name, void *buffer, size_t maxSize) { - std::ifstream inputFile; - - inputFile.open(file_name, std::ios::binary | std::ios::in); - if (!inputFile.is_open()) { - throw std::logic_error("cannot open file weight file"); - } - if (!inputFile.read(reinterpret_cast(buffer), maxSize)) { - inputFile.close(); - throw std::logic_error("cannot read bytes from weight file"); - } - - inputFile.close(); -} - -TBlob::CPtr ReadWeights(std::string filepath) { - std::ifstream weightFile(filepath, std::ifstream::ate | std::ifstream::binary); - int64_t fileSize = weightFile.tellg(); - - if (fileSize < 0) { - throw std::logic_error("Incorrect weight file"); - } - - size_t ulFileSize = static_cast(fileSize); - - TBlob::Ptr weightsPtr(new TBlob(Precision::FP32, C, {ulFileSize})); - weightsPtr->allocate(); - readFile(filepath, weightsPtr->buffer(), ulFileSize); - - return weightsPtr; -} - -/** - * @brief The entry point for inference engine automatic squeezenet networt builder sample - * @file squeezenet networt builder/main.cpp - * @example squeezenet networt builder/main.cpp - */ -int main(int argc, char *argv[]) { - try { - slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; - - if (!ParseAndCheckCommandLine(argc, argv)) { - return 0; - } - - /** This vector stores paths to the processed images **/ - std::vector images; - parseInputFilesArguments(images); - if (images.empty()) { - throw std::logic_error("No suitable images were found"); - } - - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - slog::info << "Loading plugin" << slog::endl; - InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d); - printPluginVersion(plugin, std::cout); - - /** Per layer metrics **/ - if (FLAGS_pc) { - plugin.SetConfig({ { PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::YES } }); - } - // ----------------------------------------------------------------------------------------------------- - - //--------------------------- 2. Create network using graph builder ------------------------------------ - TBlob::CPtr weightsPtr = ReadWeights(FLAGS_m); - - Builder::Network builder("LeNet"); - idx_t layerId = builder.addLayer(Builder::InputLayer("data").setPort(Port({1, 1, 28, 28}))); - auto ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {500}, Layout::C), - weightsPtr->cbuffer().as()); - auto ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {20}, Layout::C), - weightsPtr->cbuffer().as() + 500); - idx_t weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights)); - idx_t biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases)); - layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer("conv1") - .setKernel({5, 5}).setDilation({1, 1}).setGroup(1).setStrides({1, 1}).setOutDepth(20) - .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0})); - layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer("pool1").setExcludePad(true).setKernel({2, 2}) - .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}) - .setPoolingType(Builder::PoolingLayer::PoolingType::MAX) - .setRoundingType(Builder::PoolingLayer::RoundingType::CEIL).setStrides({2, 2})); - ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {25000}, Layout::C), - weightsPtr->cbuffer().as() + 520); - ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {50}, Layout::C), - weightsPtr->cbuffer().as() + 25520); - weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights)); - biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases)); - layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer("conv2") - .setDilation({1, 1}).setGroup(1).setKernel({5, 5}).setOutDepth(50).setPaddingsBegin({0, 0}) - .setPaddingsEnd({0, 0}).setStrides({1, 1})); - layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer("pool2").setExcludePad(true).setKernel({2, 2}) - .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}).setPoolingType(Builder::PoolingLayer::PoolingType::MAX) - .setRoundingType(Builder::PoolingLayer::RoundingType::CEIL).setStrides({2, 2})); - ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {400000}, Layout::C), - weightsPtr->cbuffer().as() + 102280 / 4); - ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {500}, Layout::C), - weightsPtr->cbuffer().as() + 1702280 / 4); - weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights)); - biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases)); - layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer("ip1") - .setOutputNum(500)); - layerId = builder.addLayer({{layerId}}, Builder::ReLULayer("relu1").setNegativeSlope(0.0f)); - ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {5000}, Layout::C), - weightsPtr->cbuffer().as() + 1704280 / 4); - ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {10}, Layout::C), - weightsPtr->cbuffer().as() + 1724280 / 4); - weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights)); - biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases)); - layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer("ip2") - .setOutputNum(10)); - layerId = builder.addLayer({{layerId}}, Builder::SoftMaxLayer("prob").setAxis(1)); - builder.addLayer({PortInfo(layerId)}, Builder::OutputLayer("sf_out")); - - CNNNetwork network{Builder::convertToICNNNetwork(builder.build())}; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 3. Configure input & output --------------------------------------------- - // --------------------------- Prepare input blobs ----------------------------------------------------- - slog::info << "Preparing input blobs" << slog::endl; - - InputsDataMap inputInfo = network.getInputsInfo(); - if (inputInfo.size() != 1) { - throw std::logic_error("Sample supports topologies only with 1 input"); - } - - auto inputInfoItem = *inputInfo.begin(); - - /** Specifying the precision and layout of input data provided by the user. - * This should be called before load of the network to the plugin **/ - inputInfoItem.second->setPrecision(Precision::FP32); - inputInfoItem.second->setLayout(Layout::NCHW); - - std::vector> imagesData; - for (auto & i : images) { - FormatReader::ReaderPtr reader(i.c_str()); - if (reader.get() == nullptr) { - slog::warn << "Image " + i + " cannot be read!" << slog::endl; - continue; - } - /** Store image data **/ - std::shared_ptr data( - reader->getData(inputInfoItem.second->getTensorDesc().getDims()[3], - inputInfoItem.second->getTensorDesc().getDims()[2])); - if (data.get() != nullptr) { - imagesData.push_back(data); - } - } - - if (imagesData.empty()) { - throw std::logic_error("Valid input images were not found!"); - } - - /** Setting batch size using image count **/ - network.setBatchSize(imagesData.size()); - size_t batchSize = network.getBatchSize(); - slog::info << "Batch size is " << std::to_string(batchSize) << slog::endl; - - // --------------------------- Prepare output blobs ----------------------------------------------------- - slog::info << "Checking that the outputs are as the demo expects" << slog::endl; - OutputsDataMap outputInfo(network.getOutputsInfo()); - std::string firstOutputName; - - for (auto & item : outputInfo) { - if (firstOutputName.empty()) { - firstOutputName = item.first; - } - DataPtr outputData = item.second; - if (!outputData) { - throw std::logic_error("output data pointer is not valid"); - } - - item.second->setPrecision(Precision::FP32); - } - - if (outputInfo.size() != 1) { - throw std::logic_error("This demo accepts networks having only one output"); - } - - DataPtr& output = outputInfo.begin()->second; - auto outputName = outputInfo.begin()->first; - - const SizeVector outputDims = output->getTensorDesc().getDims(); - const int classCount = outputDims[1]; - - if (classCount > 10) { - throw std::logic_error("Incorrect number of output classes for LeNet network"); - } - - if (outputDims.size() != 2) { - throw std::logic_error("Incorrect output dimensions for LeNet"); - } - output->setPrecision(Precision::FP32); - output->setLayout(Layout::NC); - - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - slog::info << "Loading model to the plugin" << slog::endl; - ExecutableNetwork exeNetwork = plugin.LoadNetwork(network, {}); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 5. Create infer request ------------------------------------------------- - InferRequest infer_request = exeNetwork.CreateInferRequest(); - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 6. Prepare input -------------------------------------------------------- - /** Iterate over all the input blobs **/ - for (const auto & item : inputInfo) { - /** Creating input blob **/ - Blob::Ptr input = infer_request.GetBlob(item.first); - - /** Filling input tensor with images. First b channel, then g and r channels **/ - size_t num_channels = input->getTensorDesc().getDims()[1]; - size_t image_size = input->getTensorDesc().getDims()[2] * input->getTensorDesc().getDims()[3]; - - auto data = input->buffer().as::value_type*>(); - - /** Iterate over all input images **/ - for (size_t image_id = 0; image_id < imagesData.size(); ++image_id) { - /** Iterate over all pixel in image (b,g,r) **/ - for (size_t pid = 0; pid < image_size; pid++) { - /** Iterate over all channels **/ - for (size_t ch = 0; ch < num_channels; ++ch) { - /** [images stride + channels stride + pixel id ] all in bytes **/ - data[image_id * image_size * num_channels + ch * image_size + pid ] = imagesData.at(image_id).get()[pid*num_channels + ch]; - } - } - } - } - inputInfo = {}; - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 7. Do inference --------------------------------------------------------- - typedef std::chrono::high_resolution_clock Time; - typedef std::chrono::duration> ms; - typedef std::chrono::duration fsec; - - double total = 0.0; - /** Start inference & calc performance **/ - for (size_t iter = 0; iter < FLAGS_ni; ++iter) { - auto t0 = Time::now(); - infer_request.Infer(); - auto t1 = Time::now(); - fsec fs = t1 - t0; - ms d = std::chrono::duration_cast(fs); - total += d.count(); - } - // ----------------------------------------------------------------------------------------------------- - - // --------------------------- 8. Process output ------------------------------------------------------- - slog::info << "Processing output blobs" << slog::endl; - - const Blob::Ptr outputBlob = infer_request.GetBlob(firstOutputName); - auto outputData = outputBlob->buffer().as::value_type*>(); - - /** Validating -nt value **/ - const size_t resultsCnt = outputBlob->size() / batchSize; - if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) { - slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \ - << resultsCnt+1 << " and more than 0)\n will be used maximal value : " << resultsCnt; - FLAGS_nt = resultsCnt; - } - - /** This vector stores id's of top N results **/ - std::vector results; - TopResults(FLAGS_nt, *outputBlob, results); - - std::cout << std::endl << "Top " << FLAGS_nt << " results:" << std::endl << std::endl; - - /** Print the result iterating over each batch **/ - for (size_t image_id = 0; image_id < batchSize; ++image_id) { - std::cout << "Image " << images[image_id] << std::endl << std::endl; - for (size_t id = image_id * FLAGS_nt, cnt = 0; cnt < FLAGS_nt; ++cnt, ++id) { - std::cout.precision(7); - /** Getting probability for resulting class **/ - const auto result = outputData[results[id] + image_id*(outputBlob->size() / batchSize)]; - std::cout << std::left << std::fixed << "Number: " << results[id] << "; Probability: " << result << std::endl; - } - std::cout << std::endl; - } - if (std::fabs(total) < std::numeric_limits::epsilon()) { - throw std::logic_error("total can't be equal to zero"); - } - // ----------------------------------------------------------------------------------------------------- - std::cout << std::endl << "total inference time: " << total << std::endl; - std::cout << "Average running time of one iteration: " << total / static_cast(FLAGS_ni) << " ms" << std::endl; - std::cout << std::endl << "Throughput: " << 1000 * static_cast(FLAGS_ni) * batchSize / total << " FPS" << std::endl; - std::cout << std::endl; - // ----------------------------------------------------------------------------------------------------- - - /** Show performance results **/ - if (FLAGS_pc) { - printPerformanceCounts(infer_request, std::cout); - } - } catch (const std::exception &ex) { - slog::err << ex.what() << slog::endl; - return 3; - } - return 0; -} \ No newline at end of file diff --git a/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt b/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt index 436edc277e1d68..5ff0036334390f 100644 --- a/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt +++ b/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt @@ -2,33 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -set (TARGET_NAME "object_detection_sample_ssd") - -file (GLOB MAIN_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -file (GLOB MAIN_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${MAIN_SRC}) -source_group("include" FILES ${MAIN_HEADERS}) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS}) - -add_dependencies(${TARGET_NAME} gflags) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} format_reader IE::ie_cpu_extension ${InferenceEngine_LIBRARIES} gflags) - -if(UNIX) - target_link_libraries( ${TARGET_NAME} ${LIB_DL} pthread) -endif() +ie_add_sample(NAME object_detection_sample_ssd + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" + HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/object_detection_sample_ssd.h" + DEPENDENCIES format_reader) \ No newline at end of file diff --git a/inference-engine/samples/object_detection_sample_ssd/README.md b/inference-engine/samples/object_detection_sample_ssd/README.md index bf8b964e099e10..237067075be818 100644 --- a/inference-engine/samples/object_detection_sample_ssd/README.md +++ b/inference-engine/samples/object_detection_sample_ssd/README.md @@ -6,7 +6,7 @@ networks like SSD-VGG on Intel® Processors and Intel® HD Graphics. ## How It Works Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference -Engine plugin. When inference is done, the application creates an +Engine device. When inference is done, the application creates an output image and outputs data to the standard output stream. > **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). @@ -26,13 +26,10 @@ Options: -h Print a usage message. -i "" Required. Path to an .bmp image. -m "" Required. Path to an .xml file with a trained model. - -l "" Required for CPU custom layers. Absolute path to a shared library with the kernel implementations. + -l "" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations. Or - -c "" Required for GPU custom kernels. Absolute path to the .xml file with the kernel descriptions. - -pp "" Optional. Path to a plugin folder. - -d "" Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified - -pc Optional. Enables per-layer performance report - -ni "" Optional. Number of iterations. Default value is 1 + -c "" Required for GPU custom kernels. Absolute path to the .xml file with the kernels descriptions. + -d "" Optional. Specify the target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. Sample will look for a suitable plugin for device specified -p_msg Optional. Enables messages from a plugin ``` diff --git a/inference-engine/samples/object_detection_sample_ssd/main.cpp b/inference-engine/samples/object_detection_sample_ssd/main.cpp index 32e41e78c1e0b6..0bbf4ee39f0f4d 100644 --- a/inference-engine/samples/object_detection_sample_ssd/main.cpp +++ b/inference-engine/samples/object_detection_sample_ssd/main.cpp @@ -3,17 +3,12 @@ // #include -#include #include -#include -#include #include #include #include -#include -#include -#include #include +#include #include #include @@ -22,6 +17,10 @@ #include #include #include + +#include +#include + #include "object_detection_sample_ssd.h" using namespace InferenceEngine; @@ -33,15 +32,12 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { showUsage(); + showAvailableDevices(); return false; } slog::info << "Parsing input parameters" << slog::endl; - if (FLAGS_ni < 1) { - throw std::logic_error("Parameter -ni should be greater than 0 (default: 1)"); - } - if (FLAGS_i.empty()) { throw std::logic_error("Parameter -i is not set"); } @@ -53,6 +49,12 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { return true; } +static std::map configure(const std::string& confFileName) { + auto config = parseConfig(confFileName); + + return config; +} + /** * \brief The entry point for the Inference Engine object_detection sample application * \file object_detection_sample_ssd/main.cpp @@ -76,11 +78,15 @@ int main(int argc, char *argv[]) { if (images.empty()) throw std::logic_error("No suitable images were found"); // ----------------------------------------------------------------------------------------------------- - // --------------------------- 3. Load Plugin for inference engine ------------------------------------- - slog::info << "Loading plugin" << slog::endl; - InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d); + // --------------------------- 3. Load inference engine ------------------------------------- + slog::info << "Loading Inference Engine" << slog::endl; + Core ie; + + slog::info << "Device info: " << slog::endl; + std::cout << ie.GetVersions(FLAGS_d); + if (FLAGS_p_msg) { - static_cast(plugin)->SetLogCallback(error_listener); + ie.SetLogCallback(error_listener); } /*If CPU device, load default library with extensions that comes with the product*/ @@ -90,29 +96,21 @@ int main(int argc, char *argv[]) { * custom MKLDNNPlugin layer implementations. These layers are not supported * by mkldnn, but they can be useful for inferring custom topologies. **/ - plugin.AddExtension(std::make_shared()); + ie.AddExtension(std::make_shared(), "CPU"); } if (!FLAGS_l.empty()) { // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension IExtensionPtr extension_ptr = make_so_pointer(FLAGS_l); - plugin.AddExtension(extension_ptr); + ie.AddExtension(extension_ptr, "CPU"); slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; } if (!FLAGS_c.empty()) { // clDNN Extensions are loaded from an .xml description and OpenCL kernel files - plugin.SetConfig({ { PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c } }); + ie.SetConfig({ { PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c } }, "GPU"); slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; } - - /** Setting plugin parameter for per layer metrics **/ - if (FLAGS_pc) { - plugin.SetConfig({ { PluginConfigParams::KEY_PERF_COUNT, PluginConfigParams::YES } }); - } - - /** Printing plugin version **/ - printPluginVersion(plugin, std::cout); // ----------------------------------------------------------------------------------------------------- // --------------------------- 4. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ @@ -191,7 +189,7 @@ int main(int argc, char *argv[]) { std::string outputName; DataPtr outputInfo; for (const auto& out : outputsInfo) { - if (out.second->creatorLayer.lock()->type == "DetectionOutput") { + if (out.second->getCreatorLayer().lock()->type == "DetectionOutput") { outputName = out.first; outputInfo = out.second; } @@ -214,17 +212,18 @@ int main(int argc, char *argv[]) { throw std::logic_error("Incorrect output dimensions for SSD model"); } - /** Set the precision of output data provided by the user, should be called before load of the network to the plugin **/ + /** Set the precision of output data provided by the user, should be called before load of the network to the device **/ outputInfo->setPrecision(Precision::FP32); // ----------------------------------------------------------------------------------------------------- - // --------------------------- 7. Loading model to the plugin ------------------------------------------ - slog::info << "Loading model to the plugin" << slog::endl; + // --------------------------- 7. Loading model to the device ------------------------------------------ + slog::info << "Loading model to the device" << slog::endl; - ExecutableNetwork executable_network = plugin.LoadNetwork(network, {}); + ExecutableNetwork executable_network = ie.LoadNetwork(network, FLAGS_d, configure(FLAGS_config)); // ----------------------------------------------------------------------------------------------------- // --------------------------- 8. Create infer request ------------------------------------------------- + slog::info << "Create infer request" << slog::endl; InferRequest infer_request = executable_network.CreateInferRequest(); // ----------------------------------------------------------------------------------------------------- @@ -298,22 +297,8 @@ int main(int argc, char *argv[]) { // ----------------------------------------------------------------------------------------------------- // --------------------------- 10. Do inference --------------------------------------------------------- - slog::info << "Start inference (" << FLAGS_ni << " iterations)" << slog::endl; - - typedef std::chrono::high_resolution_clock Time; - typedef std::chrono::duration> ms; - typedef std::chrono::duration fsec; - - double total = 0.0; - /** Start inference & calc performance **/ - for (size_t iter = 0; iter < FLAGS_ni; ++iter) { - auto t0 = Time::now(); - infer_request.Infer(); - auto t1 = Time::now(); - fsec fs = t1 - t0; - ms d = std::chrono::duration_cast(fs); - total += d.count(); - } + slog::info << "Start inference" << slog::endl; + infer_request.Infer(); // ----------------------------------------------------------------------------------------------------- // --------------------------- 11. Process output ------------------------------------------------------- @@ -365,15 +350,6 @@ int main(int argc, char *argv[]) { } } // ----------------------------------------------------------------------------------------------------- - std::cout << std::endl << "total inference time: " << total << std::endl; - std::cout << "Average running time of one iteration: " << total / static_cast(FLAGS_ni) << " ms" << std::endl; - std::cout << std::endl << "Throughput: " << 1000 * static_cast(FLAGS_ni) * batchSize / total << " FPS" << std::endl; - std::cout << std::endl; - - /** Show performance results **/ - if (FLAGS_pc) { - printPerformanceCounts(infer_request, std::cout); - } } catch (const std::exception& error) { slog::err << error.what() << slog::endl; @@ -385,5 +361,7 @@ int main(int argc, char *argv[]) { } slog::info << "Execution successful" << slog::endl; + slog::info << slog::endl << "This sample is an API example, for any performance measurements " + "please use the dedicated benchmark_app tool" << slog::endl; return 0; } diff --git a/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h b/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h index 540ed59cef6656..8a5a4ab9716a2c 100644 --- a/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h +++ b/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h @@ -9,12 +9,6 @@ #include #include -#ifdef _WIN32 -#include -#else -#include -#endif - /* thickness of a line (in pixels) to be used for bounding boxes */ #define BBOX_THICKNESS 2 @@ -24,9 +18,6 @@ static const char help_message[] = "Print a usage message."; /// @brief message for images argument static const char image_message[] = "Required. Path to an .bmp image."; -/// @brief message for plugin_path argument -static const char plugin_path_message[] = "Optional. Path to a plugin folder."; - /// @brief message for model argument static const char model_message[] = "Required. Path to an .xml file with a trained model."; @@ -35,15 +26,10 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If "the sample will look for this plugin only"; /// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \ +static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " \ +"Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " \ "Sample will look for a suitable plugin for device specified"; -/// @brief message for performance counters -static const char performance_counter_message[] = "Optional. Enables per-layer performance report"; - -/// @brief message for iterations count -static const char iterations_count_message[] = "Optional. Number of iterations. Default value is 1"; - /// @brief message for clDNN custom kernels desc static const char custom_cldnn_message[] = "Required for GPU custom kernels. "\ "Absolute path to the .xml file with the kernels descriptions."; @@ -55,6 +41,9 @@ static const char custom_cpu_library_message[] = "Required for CPU custom layers /// @brief message for plugin messages static const char plugin_err_message[] = "Optional. Enables messages from a plugin"; +/// @brief message for config argument +static constexpr char config_message[] = "Path to the configuration file. Default value: \"config\"."; + /// \brief Define flag for showing help message
DEFINE_bool(h, false, help_message); @@ -66,16 +55,9 @@ DEFINE_string(i, "", image_message); /// It is a required parameter DEFINE_string(m, "", model_message); -/// \brief Define parameter for set path to plugins
-/// Default is ./lib -DEFINE_string(pp, "", plugin_path_message); - /// \brief device the target device to infer on
DEFINE_string(d, "CPU", target_device_message); -/// \brief Enable per-layer performance report -DEFINE_bool(pc, false, performance_counter_message); - /// @brief Define parameter for clDNN custom kernels path
/// Default is ./lib DEFINE_string(c, "", custom_cldnn_message); @@ -84,12 +66,12 @@ DEFINE_string(c, "", custom_cldnn_message); /// It is a optional parameter DEFINE_string(l, "", custom_cpu_library_message); -/// @brief Iterations count (default 1) -DEFINE_uint32(ni, 1, iterations_count_message); - /// @brief Enable plugin messages DEFINE_bool(p_msg, false, plugin_err_message); +/// @brief Define path to plugin config +DEFINE_string(config, "", config_message); + /** * \brief This function show a help message */ @@ -101,12 +83,9 @@ static void showUsage() { std::cout << " -h " << help_message << std::endl; std::cout << " -i \"\" " << image_message << std::endl; std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; + std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; std::cout << " Or" << std::endl; - std::cout << " -c \"\" " << custom_cldnn_message << std::endl; - std::cout << " -pp \"\" " << plugin_path_message << std::endl; + std::cout << " -c \"\" " << custom_cldnn_message << std::endl; std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -pc " << performance_counter_message << std::endl; - std::cout << " -ni \"\" " << iterations_count_message << std::endl; std::cout << " -p_msg " << plugin_err_message << std::endl; } diff --git a/inference-engine/samples/perfcheck/CMakeLists.txt b/inference-engine/samples/perfcheck/CMakeLists.txt deleted file mode 100644 index 4a68a8bfa47b2e..00000000000000 --- a/inference-engine/samples/perfcheck/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set(TARGET_NAME perfcheck) - -find_package(OpenCV COMPONENTS imgproc QUIET) -if(NOT(OpenCV_FOUND)) - message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped") - return() -endif() - -file(GLOB SOURCES *.cpp) - -add_executable(${TARGET_NAME} ${SOURCES}) - -if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - target_compile_options(${TARGET_NAME} - PRIVATE "-Wall" - PRIVATE "-Wextra" - PRIVATE "-Wformat" - PRIVATE "-Wno-missing-field-initializers" - ) -endif() - -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - target_compile_options(${TARGET_NAME} - PRIVATE "-Wno-c++98-compat" - PRIVATE "-Wno-global-constructors" - PRIVATE "-Wno-missing-variable-declarations" - PRIVATE "-Wno-exit-time-destructors" - PRIVATE "-Wno-undefined-func-template" - ) -endif() - -target_include_directories(${TARGET_NAME} SYSTEM PRIVATE - "${IE_MAIN_SOURCE_DIR}/include" - "${IE_MAIN_SOURCE_DIR}/samples/common" - "${IE_MAIN_SOURCE_DIR}/samples/common/format_reader" - "${IE_MAIN_SOURCE_DIR}/samples/common/os/windows" - "${CMAKE_SOURCE_DIR}/src/vpu/graph_transformer/include" - ${OpenCV_INCLUDE_DIRS} -) - -add_dependencies(${TARGET_NAME} gflags IE::ie_cpu_extension) -target_link_libraries(${TARGET_NAME} PRIVATE - ${InferenceEngine_LIBRARIES} format_reader - IE::ie_cpu_extension - ${OpenCV_LIBS} - gflags - ${CMAKE_DL_LIBS} -) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FILES" "${CMAKE_CXX_FILES} -fPIE") diff --git a/inference-engine/samples/perfcheck/README.md b/inference-engine/samples/perfcheck/README.md deleted file mode 100644 index e38bd29e685e79..00000000000000 --- a/inference-engine/samples/perfcheck/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# Perfcheck Sample - -This topic demonstrates how to build and run the Perfcheck sample application, which estimates performance by calculating minimum, average, and maximum FPS. - -## How It Works - -Upon the start-up, the sample application reads command line parameters and loads a network and its inputs from given directory to the Inference Engine plugin. -Then application starts infer requests in asynchronous mode till specified number of iterations is finished. -After inference stage, Perfcheck sample computes total time of execution, divides execution time in 10 intervals and evaluates minimum, average and maximum FPS among these intervals. - -## Running - -Running the application with the `-h` option yields the following usage message: - -```sh -./perfcheck -h -[ INFO ] Inference Engine: - API version ............ - Build .................. - -perfcheck [OPTIONS] -[OPTIONS]: - -m Required. Path to an .xml file with a trained model. - -h Optional. Print a usage message. - -d Optional. Specify the target device to infer on. Sample will look for a suitable plugin for device specified. Default value: CPU. - -pp Optional. Path to a plugin folder. - -l Optional. Required for CPU custom layers. Absolute path to a shared library with the kernels implementation. - -c Optional. Required for GPU custom kernels. Absolute path to an .xml file with the kernels description. - -inputs_dir Optional. Path to a folder with images and binaries for inputs. Default value: ".". - -config Optional. Path to a configuration file. - -num_iterations Optional. Specify number of iterations. Default value: 1000. Must be greater than or equal to 1000. - -batch Optional. Specify batch. Default value: 1. - -num_networks Optional. Specify number of networks. Default value: 1. Must be less than or equal to 16. - -num_requests Optional. Specify number of infer requests. Default value depends on specified device. - -num_fpga_devices Optional. Specify number of FPGA devices. Default value: 1. -``` - -Running the application with the empty list of options yields an error message. - -To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/). - -> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -You can use the following command to do inference on CPU on images from a folder using a trained Faster R-CNN network: - -```sh -./perfcheck -m /faster_rcnn.xml -inputs_dir -d CPU -``` - -## Sample Output - -The application outputs a performance statistics that shows: total execution time (in milliseconds), number of iterations, batch size, minimum, average and maximum FPS. -Example of sample output: - -```sh -[ INFO ] Inference Engine: - API version ............ - Build .................. -[ INFO ] Loading network files: -[ INFO ] -[ INFO ] -[ INFO ] Loading network 0 -[ INFO ] All networks are loaded - -Total time: 8954.61 ms -Num iterations: 1000 -Batch: 1 -Min FPS: 110.558 -Avg FPS: 111.674 -Max FPS: 112.791 -``` - -## See Also - -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) -* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) -* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) diff --git a/inference-engine/samples/perfcheck/main.cpp b/inference-engine/samples/perfcheck/main.cpp deleted file mode 100644 index f32ed55d043eb3..00000000000000 --- a/inference-engine/samples/perfcheck/main.cpp +++ /dev/null @@ -1,551 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#if defined(_WIN32) -#include -#else -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "inference_engine.hpp" -#include "ext_list.hpp" - -#include "vpu/vpu_plugin_config.hpp" -#include "samples/common.hpp" -#include "samples/slog.hpp" - -#include "perfcheck.h" - - -static bool parseCommandLine(int *argc, char ***argv) { - gflags::ParseCommandLineNonHelpFlags(argc, argv, true); - - if (FLAGS_h) { - showUsage(); - return false; - } - - if (FLAGS_m.empty()) { - throw std::invalid_argument("Path to model xml file is required"); - } - - if (FLAGS_num_iterations < MIN_ITERATIONS) { - throw std::invalid_argument("Number of iterations must be not smaller than 1000. " - "Got " + std::to_string(FLAGS_num_iterations)); - } - - if (MAX_NETWORKS < FLAGS_num_networks) { - throw std::invalid_argument("Only number of networks not greater than " + std::to_string(MAX_NETWORKS) + " " - "is supported. Got " + std::to_string(FLAGS_num_networks)); - } - - if (FLAGS_d.empty()) { - throw std::invalid_argument("Plugin name is required"); - } - - if (1 < *argc) { - std::stringstream message; - message << "Unknown arguments: "; - for (auto arg = 1; arg < *argc; arg++) { - message << argv[arg]; - if (arg < *argc) { - message << " "; - } - } - throw std::invalid_argument(message.str()); - } - - return true; -} - -static std::map parseConfig(const std::string &configName, char comment = '#') { - std::map config = {}; - - std::ifstream file(configName); - if (!file.is_open()) { - return config; - } - - std::string key, value; - while (file >> key >> value) { - if (key.empty() || key[0] == comment) { - continue; - } - config[key] = value; - } - - return config; -} - -static std::size_t getNumberRequests(const std::string &plugin) { - static const std::unordered_map supported_plugins = { - { "MYRIAD", 4 }, - { "FPGA", 3 }, - }; - - auto device = plugin; - if (plugin.find("HETERO:") == 0) { - auto separator = plugin.find(","); - auto deviceBegin = std::string("HETERO:").size(); - auto deviceEnd = separator == std::string::npos ? plugin.size() : separator; - device = plugin.substr(deviceBegin, deviceEnd - deviceBegin); - } - - auto num_requests = supported_plugins.find(device); - return num_requests == supported_plugins.end() ? 1 : num_requests->second; -} - -#if defined(WIN32) || defined(__APPLE__) -typedef std::chrono::time_point time_point; -#else -typedef std::chrono::time_point time_point; -#endif - -static void printFPS(std::size_t num_requests, std::size_t num_intervals, const std::vector &points) { - std::size_t num_exclude = 2 * num_requests; - /* evaluate from the end of previous */ - std::size_t first_point = num_exclude - 1; - std::size_t last_point = points.size() - num_exclude; - auto begin = points[first_point]; - auto end = points[last_point - 1]; - - using ms = std::chrono::duration>; - - auto num_iterations = last_point - first_point - 1; - auto total = std::chrono::duration_cast(end - begin).count(); - auto avg_fps = static_cast(num_iterations) * 1000.0 * FLAGS_batch / total; - - auto min_fps = std::numeric_limits::max(); - auto max_fps = std::numeric_limits::min(); - double step = total / num_intervals; - std::size_t first_point_in_interval = first_point + 1; - auto first_time_in_interval = std::chrono::time_point_cast(begin); - for (std::size_t interval = 0; interval < num_intervals; interval++) { - std::size_t num_points_in_interval = 0; - auto last_time_in_interval = first_time_in_interval + ms(step); - if (interval == num_intervals - 1) { - last_time_in_interval = end; - } - - while (first_point_in_interval + num_points_in_interval < last_point && - points[first_point_in_interval + num_points_in_interval] <= last_time_in_interval) { - num_points_in_interval++; - } - - double fps = num_points_in_interval * FLAGS_batch / step * 1000; - min_fps = std::min(min_fps, fps); - max_fps = std::max(max_fps, fps); - - first_point_in_interval += num_points_in_interval; - first_time_in_interval = last_time_in_interval; - } - - std::cout << std::endl; - std::cout << "Total time: " << total << " ms"; - std::cout << std::endl; - - std::cout << "Num iterations: " << num_iterations << std::endl; - std::cout << "Batch: " << FLAGS_batch << std::endl; - - std::cout << "Min FPS: " << min_fps << std::endl; - std::cout << "Avg FPS: " << avg_fps << std::endl; - std::cout << "Max FPS: " << max_fps << std::endl; -} - -template -static bool isImage(const T &blob) { - auto descriptor = blob->getTensorDesc(); - if (descriptor.getLayout() != InferenceEngine::NCHW) { - return false; - } - - auto channels = descriptor.getDims()[1]; - return channels == 3; -} - -static std::vector extractFilesByExtension(const std::string &directory, const std::string &extension) { - std::vector files; - - DIR *dir = opendir(directory.c_str()); - if (!dir) { - throw std::invalid_argument("Can not open " + directory); - } - - auto getExtension = [](const std::string &name) { - auto extensionPosition = name.rfind('.', name.size()); - return extensionPosition == std::string::npos ? "" : name.substr(extensionPosition + 1, name.size() - 1); - }; - - dirent *ent = nullptr; - while ((ent = readdir(dir))) { - std::string file_name = ent->d_name; - if (getExtension(file_name) != extension) { - continue; - } - - std::stringstream stream; - stream << directory << "/" << file_name; - - auto full_file_name = stream.str(); - - struct stat st = {}; - if (stat(full_file_name.c_str(), &st) != 0) { - continue; - } - - bool is_directory = (st.st_mode & S_IFDIR) != 0; - if (is_directory) { - continue; - } - - files.push_back(full_file_name); - } - - closedir(dir); - - return files; -} - -static float asfloat(uint32_t v) { - union { - float f; - std::uint32_t u; - } converter = {0}; - converter.u = v; - return converter.f; -} - -static short f32tof16(float x) { - static float min16 = asfloat((127 - 14) << 23); - - static float max16 = asfloat(((127 + 15) << 23) | 0x007FE000); - static uint32_t max16f16 = ((15 + 15) << 10) | 0x3FF; - - static constexpr std::uint32_t EXP_MASK_F32 = 0x7F800000U; - - union { - float f; - uint32_t u; - } v = {0}; - v.f = x; - - uint32_t s = (v.u >> 16) & 0x8000; - - v.u &= 0x7FFFFFFF; - - if ((v.u & EXP_MASK_F32) == EXP_MASK_F32) { - if (v.u & 0x007FFFFF) { - return static_cast(s | (v.u >> (23 - 10)) | 0x0200); - } else { - return static_cast(s | (v.u >> (23 - 10))); - } - } - - float halfULP = asfloat(v.u & EXP_MASK_F32) * asfloat((127 - 11) << 23); - v.f += halfULP; - - if (v.f < min16 * 0.5f) { - return static_cast(s); - } - - if (v.f < min16) { - return static_cast(s | (1 << 10)); - } - - if (v.f >= max16) { - return static_cast(max16f16 | s); - } - - v.u -= ((127 - 15) << 23); - - v.u >>= (23 - 10); - - return static_cast(v.u | s); -} - -static void loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob) { - InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc(); - - cv::Mat image = cv::imread(imageFilename); - if (image.empty()) { - throw std::invalid_argument("Can not read image from " + imageFilename); - } - - std::size_t batch = blob->dims()[3]; - std::size_t w = blob->dims()[0]; - std::size_t h = blob->dims()[1]; - auto img_w = static_cast(image.cols); - auto img_h = static_cast(image.rows); - - auto numBlobChannels = blob->dims()[2]; - auto numImageChannels = static_cast(image.channels()); - if (numBlobChannels != numImageChannels && numBlobChannels != 1) { - throw std::invalid_argument("Input channels mismatch: image channels " + std::to_string(numImageChannels) + - ", network channels " + std::to_string(numBlobChannels) + - ", expecting count of image channels are equal to count if network channels" - "or count of network channels are equal to 1"); - } - - auto nPixels = w * h; - unsigned char *RGB8 = image.data; - float xscale = 1.0f * img_w / w; - float yscale = 1.0f * img_h / h; - - for (std::size_t n = 0; n != batch; n++) { - for (std::size_t i = 0; i < h; ++i) { - auto y = static_cast(std::floor((i + 0.5f) * yscale)); - for (std::size_t j = 0; j < w; ++j) { - auto x = static_cast(std::floor((j + 0.5f) * xscale)); - for (std::size_t k = 0; k < numBlobChannels; k++) { - float value = 1.0f * RGB8[(y * img_w + x) * numImageChannels + k]; - if (InferenceEngine::Precision::FP16 == tensDesc.getPrecision()) { - if (tensDesc.getLayout() == InferenceEngine::NHWC) { - blob->buffer().as()[n * h * w * numBlobChannels + (i * w + j) * numBlobChannels + k] = f32tof16(value); - } else { - blob->buffer().as()[n * h * w * numBlobChannels + (i * w + j) + k * nPixels] = f32tof16(value); - } - } else { - if (tensDesc.getLayout() == InferenceEngine::NHWC) { - blob->buffer().as()[n * h * w * numBlobChannels + (i * w + j) * numBlobChannels + k] = value; - } else { - blob->buffer().as()[n * h * w * numBlobChannels + (i * w + j) + k * nPixels] = value; - } - } - } - } - } - } -} - -static void loadBinaryTensor(const std::string &binaryFileName, InferenceEngine::Blob::Ptr &blob) { - InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc(); - - std::ifstream binaryFile(binaryFileName, std::ios_base::binary | std::ios_base::ate); - if (!binaryFile) { - throw std::invalid_argument("Can not open \"" + binaryFileName + "\""); - } - - auto fileSize = static_cast(binaryFile.tellg()); - binaryFile.seekg(0, std::ios_base::beg); - if (!binaryFile.good()) { - throw std::invalid_argument("Can not read \"" + binaryFileName + "\""); - } - - auto networkSize = blob->size() * sizeof(float); - if (fileSize != networkSize) { - throw std::invalid_argument("File \"" + binaryFileName + "\" contains " + std::to_string(fileSize) + " bytes " - "but network expects " + std::to_string(networkSize)); - } - - for (std::size_t i = 0; i < blob->size(); i++) { - float src = 0.f; - binaryFile.read(reinterpret_cast(&src), sizeof(float)); - if (InferenceEngine::Precision::FP16 == tensDesc.getPrecision()) { - blob->buffer().as()[i] = f32tof16(src); - } else { - blob->buffer().as()[i] = src; - } - } -} - -static void loadInputs(std::size_t requestIdx, const std::vector &images, - const std::vector &binaries, InferenceEngine::InferRequest &request, - InferenceEngine::CNNNetwork &network) { - for (auto &&input : network.getInputsInfo()) { - auto blob = request.GetBlob(input.first); - - if (isImage(blob)) { - loadImage(images[requestIdx % images.size()], blob); - } else { - loadBinaryTensor(binaries[requestIdx % binaries.size()], blob); - } - } -} - -int main(int argc, char *argv[]) { - try { - slog::info << "Inference Engine: " << InferenceEngine::GetInferenceEngineVersion() << slog::endl; - - if (!parseCommandLine(&argc, &argv)) { - return EXIT_SUCCESS; - } - - std::string binFileName = fileNameNoExt(FLAGS_m) + ".bin"; - slog::info << "Loading network files:" << - slog::endl << "\t" << FLAGS_m << - slog::endl << "\t" << binFileName << - slog::endl; - - InferenceEngine::CNNNetReader networkReader; - networkReader.ReadNetwork(FLAGS_m); - networkReader.ReadWeights(binFileName); - - auto network = networkReader.getNetwork(); - network.setBatchSize(FLAGS_batch); - - if (FLAGS_d.find("MYRIAD") != std::string::npos || FLAGS_d.find("HDDL") != std::string::npos) { - /** - * on VPU devices FP16 precision allows avoid extra conversion operations and shows better performance - **/ - for (auto &&input : network.getInputsInfo()) { - input.second->setPrecision(InferenceEngine::Precision::FP16); - } - - for (auto &&output : network.getOutputsInfo()) { - output.second->setPrecision(InferenceEngine::Precision::FP16); - } - } - - auto plugin = InferenceEngine::PluginDispatcher({FLAGS_pp}).getPluginByDevice(FLAGS_d); - - /* If CPU device, load default library with extensions that comes with the product */ - if (FLAGS_d.find("CPU") != std::string::npos) { - /** - * cpu_extensions library is compiled from "extension" folder containing - * custom MKLDNNPlugin layer implementations. These layers are not supported - * by mkldnn, but they can be useful for inferencing custom topologies. - **/ - plugin.AddExtension(std::make_shared()); - } - - if (!FLAGS_l.empty()) { - plugin.AddExtension(InferenceEngine::make_so_pointer(FLAGS_l)); - slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; - } - - if (!FLAGS_c.empty()) { - /* clDNN Extensions are loaded from an .xml description and OpenCL kernel files */ - plugin.SetConfig({{InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}); - slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; - } - - auto config = parseConfig(FLAGS_config); - std::vector networks(FLAGS_num_networks); - for (std::size_t net = 0; net < networks.size(); ++net) { - slog::info << "Loading network " << net; - if (FLAGS_d.find("FPGA") != std::string::npos) { - if (FLAGS_num_fpga_devices != 1) { - config[InferenceEngine::PluginConfigParams::KEY_DEVICE_ID] = std::to_string(net % FLAGS_num_fpga_devices); - slog::info << " to device " << (net % FLAGS_num_fpga_devices); - } - } - slog::info << slog::endl; - - networks[net] = plugin.LoadNetwork(network, config); - } - slog::info << "All networks are loaded" << slog::endl; - - auto num_requests = FLAGS_num_requests == 0 ? getNumberRequests(FLAGS_d) : FLAGS_num_requests; - - auto images = extractFilesByExtension(FLAGS_inputs_dir, "bmp"); - auto hasImageInput = [](const InferenceEngine::CNNNetwork &net) { - auto inputs = net.getInputsInfo(); - auto isImageInput = [](const InferenceEngine::InputsDataMap::value_type &input) { - return isImage(input.second); - }; - return std::any_of(inputs.begin(), inputs.end(), isImageInput); - }; - - if (hasImageInput(network) && images.empty()) { - throw std::invalid_argument("The directory \"" + FLAGS_inputs_dir + "\" does not contain images for network"); - } - - auto binaries = extractFilesByExtension(FLAGS_inputs_dir, "bin"); - auto hasBinaryInput = [](const InferenceEngine::CNNNetwork &net) { - auto inputs = net.getInputsInfo(); - auto isBinaryInput = [](const InferenceEngine::InputsDataMap::value_type &input) { - return !isImage(input.second); - }; - return std::any_of(inputs.begin(), inputs.end(), isBinaryInput); - }; - - if (hasBinaryInput(network) && binaries.empty()) { - throw std::invalid_argument("The directory \"" + FLAGS_inputs_dir + "\" does not contain binaries for network"); - } - - std::size_t iteration{0}; - std::mutex dump_time; - std::atomic num_finished{0}; - - std::promise done; - num_requests *= FLAGS_num_networks; - std::size_t num_iterations = 2 * num_requests + FLAGS_num_iterations + 2 * num_requests; - - std::vector requests(num_requests); - std::vector time_points(num_iterations); - - using callback_t = std::function; - - for (std::size_t request = 0; request < num_requests; ++request) { - requests[request] = networks[request % networks.size()].CreateInferRequest(); - - loadInputs(request, images, binaries, requests[request], network); - - callback_t callback = - [num_requests, num_iterations, &iteration, &time_points, &dump_time, &num_finished, &done] - (InferenceEngine::InferRequest inferRequest, InferenceEngine::StatusCode code) { - if (code != InferenceEngine::StatusCode::OK) { - THROW_IE_EXCEPTION << "Infer request failed with code " << code; - } - - std::size_t current_finished_iteration = 0; - { - std::lock_guard lock(dump_time); - - current_finished_iteration = iteration++; - if (current_finished_iteration < num_iterations) { - time_points[current_finished_iteration] = std::chrono::high_resolution_clock::now(); - } - } - - if (current_finished_iteration < num_iterations - 1) { - inferRequest.StartAsync(); - } else { - if (++num_finished == num_requests) { - done.set_value(); - } - } - }; - - requests[request].SetCompletionCallback(callback); - } - - auto doneFuture = done.get_future(); - - for (auto &&request : requests) { - request.StartAsync(); - } - - doneFuture.wait(); - - printFPS(num_requests, 10, time_points); - } catch (const std::exception &error) { - slog::err << error.what() << slog::endl; - return EXIT_FAILURE; - } catch (...) { - slog::err << "Unknown/internal exception happened." << slog::endl; - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/inference-engine/samples/perfcheck/perfcheck.h b/inference-engine/samples/perfcheck/perfcheck.h deleted file mode 100644 index facc5f69237cb8..00000000000000 --- a/inference-engine/samples/perfcheck/perfcheck.h +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -static constexpr std::size_t MIN_ITERATIONS = 1000; -static constexpr std::size_t MAX_NETWORKS = 16; - -/// @brief message for model argument -static constexpr char model_message[] = "Required. Path to an .xml file with a trained model."; -DEFINE_string(m, "", model_message); - -/// @brief message for help argument -static constexpr char help_message[] = "Optional. Print a usage message."; -DEFINE_bool(h, false, help_message); - -/// @brief message target_device argument -static constexpr char target_device_message[] = "Optional. Specify the target device to infer on. " \ -"Sample will look for a suitable plugin for device specified. Default: CPU."; -DEFINE_string(d, "CPU", target_device_message); - -/// @brief message for plugin_path argument -static constexpr char plugin_path_message[] = "Optional. Path to a plugin folder."; -DEFINE_string(pp, "", plugin_path_message); - -/// @brief message for custom_cpu_library argument -static constexpr char custom_cpu_library_message[] = "Optional. Required for CPU custom layers. " \ -"Absolute path to a shared library with the kernels implementation."; -DEFINE_string(l, "", custom_cpu_library_message); - -/// @brief message for custom_gpu_library argument -static constexpr char custom_gpu_library_message[] = "Optional. Required for GPU custom kernels. "\ -"Absolute path to the xml file with the kernels description."; -DEFINE_string(c, "", custom_gpu_library_message); - -/// @brief message for inputs_dir argument -static constexpr char inputs_dir_message[] = "Optional. Path to a folder with images and binaries for inputs. " \ -"Default value: \".\"."; -DEFINE_string(inputs_dir, ".", inputs_dir_message); - -/// @brief message for config argument -static constexpr char config_message[] = "Optional. Path to a configuration file."; -DEFINE_string(config, "", config_message); - -/// @brief message for num_iterations argument -static constexpr char num_iterations_message[] = "Optional. Specify number of iterations. " \ -"Default value: 1000. Must be greater than or equal to 1000."; -DEFINE_uint32(num_iterations, MIN_ITERATIONS, num_iterations_message); - -/// @brief message for batch argument -static constexpr char batch_message[] = "Optional. Specify batch. Default value: 1."; -DEFINE_uint32(batch, 1, batch_message); - -/// @brief message for num_networks argument -static constexpr char num_networks_message[] = "Optional. Specify number of networks. Default value: 1. Must be less than or equal to 16"; -DEFINE_uint32(num_networks, 1, num_networks_message); - -/// @brief message for num_requests argument -static constexpr char num_requests_message[] = "Optional. Specify number of infer requests. " \ -"Default value depends on specified device."; -DEFINE_uint32(num_requests, 0, num_requests_message); - -/// @brief message for num_fpga_devices argument -static constexpr char num_fpga_devices_message[] = "Optional. Specify number of FPGA devices. Default value: 1."; -DEFINE_uint32(num_fpga_devices, 1, num_fpga_devices_message); - -/** -* \brief This function shows a help message -*/ -static void showUsage() { - std::cout << std::endl; - std::cout << "perfcheck [OPTIONS]" << std::endl; - std::cout << "[OPTIONS]:" << std::endl; - std::cout << "\t-m \t \t" << model_message << std::endl; - std::cout << "\t-h \t \t" << help_message << std::endl; - std::cout << "\t-d \t \t" << target_device_message << std::endl; - std::cout << "\t-pp \t \t" << plugin_path_message << std::endl; - std::cout << "\t-l \t \t" << custom_cpu_library_message << std::endl; - std::cout << "\t-c \t \t" << custom_gpu_library_message << std::endl; - std::cout << "\t-inputs_dir \t \t" << inputs_dir_message << std::endl; - std::cout << "\t-config \t \t" << config_message << std::endl; - std::cout << "\t-num_iterations \t \t" << num_iterations_message << std::endl; - std::cout << "\t-batch \t \t" << batch_message << std::endl; - std::cout << "\t-num_networks \t \t" << num_networks_message << std::endl; - std::cout << "\t-num_requests \t \t" << num_requests_message << std::endl; - std::cout << "\t-num_fpga_devices \t \t" << num_fpga_devices_message << std::endl; - - std::cout << std::endl; -} diff --git a/inference-engine/samples/sample_data/car.png b/inference-engine/samples/sample_data/car.png deleted file mode 100644 index f22d8d66fa83c4..00000000000000 Binary files a/inference-engine/samples/sample_data/car.png and /dev/null differ diff --git a/inference-engine/samples/sample_data/squeezenet1.1.labels b/inference-engine/samples/sample_data/squeezenet1.1.labels deleted file mode 100644 index a509c007481d30..00000000000000 --- a/inference-engine/samples/sample_data/squeezenet1.1.labels +++ /dev/null @@ -1,1000 +0,0 @@ -tench, Tinca tinca -goldfish, Carassius auratus -great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias -tiger shark, Galeocerdo cuvieri -hammerhead, hammerhead shark -electric ray, crampfish, numbfish, torpedo -stingray -cock -hen -ostrich, Struthio camelus -brambling, Fringilla montifringilla -goldfinch, Carduelis carduelis -house finch, linnet, Carpodacus mexicanus -junco, snowbird -indigo bunting, indigo finch, indigo bird, Passerina cyanea -robin, American robin, Turdus migratorius -bulbul -jay -magpie -chickadee -water ouzel, dipper -kite -bald eagle, American eagle, Haliaeetus leucocephalus -vulture -great grey owl, great gray owl, Strix nebulosa -European fire salamander, Salamandra salamandra -common newt, Triturus vulgaris -eft -spotted salamander, Ambystoma maculatum -axolotl, mud puppy, Ambystoma mexicanum -bullfrog, Rana catesbeiana -tree frog, tree-frog -tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui -loggerhead, loggerhead turtle, Caretta caretta -leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea -mud turtle -terrapin -box turtle, box tortoise -banded gecko -common iguana, iguana, Iguana iguana -American chameleon, anole, Anolis carolinensis -whiptail, whiptail lizard -agama -frilled lizard, Chlamydosaurus kingi -alligator lizard -Gila monster, Heloderma suspectum -green lizard, Lacerta viridis -African chameleon, Chamaeleo chamaeleon -Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis -African crocodile, Nile crocodile, Crocodylus niloticus -American alligator, Alligator mississipiensis -triceratops -thunder snake, worm snake, Carphophis amoenus -ringneck snake, ring-necked snake, ring snake -hognose snake, puff adder, sand viper -green snake, grass snake -king snake, kingsnake -garter snake, grass snake -water snake -vine snake -night snake, Hypsiglena torquata -boa constrictor, Constrictor constrictor -rock python, rock snake, Python sebae -Indian cobra, Naja naja -green mamba -sea snake -horned viper, cerastes, sand viper, horned asp, Cerastes cornutus -diamondback, diamondback rattlesnake, Crotalus adamanteus -sidewinder, horned rattlesnake, Crotalus cerastes -trilobite -harvestman, daddy longlegs, Phalangium opilio -scorpion -black and gold garden spider, Argiope aurantia -barn spider, Araneus cavaticus -garden spider, Aranea diademata -black widow, Latrodectus mactans -tarantula -wolf spider, hunting spider -tick -centipede -black grouse -ptarmigan -ruffed grouse, partridge, Bonasa umbellus -prairie chicken, prairie grouse, prairie fowl -peacock -quail -partridge -African grey, African gray, Psittacus erithacus -macaw -sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita -lorikeet -coucal -bee eater -hornbill -hummingbird -jacamar -toucan -drake -red-breasted merganser, Mergus serrator -goose -black swan, Cygnus atratus -tusker -echidna, spiny anteater, anteater -platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus -wallaby, brush kangaroo -koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus -wombat -jellyfish -sea anemone, anemone -brain coral -flatworm, platyhelminth -nematode, nematode worm, roundworm -conch -snail -slug -sea slug, nudibranch -chiton, coat-of-mail shell, sea cradle, polyplacophore -chambered nautilus, pearly nautilus, nautilus -Dungeness crab, Cancer magister -rock crab, Cancer irroratus -fiddler crab -king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica -American lobster, Northern lobster, Maine lobster, Homarus americanus -spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish -crayfish, crawfish, crawdad, crawdaddy -hermit crab -isopod -white stork, Ciconia ciconia -black stork, Ciconia nigra -spoonbill -flamingo -little blue heron, Egretta caerulea -American egret, great white heron, Egretta albus -bittern -crane -limpkin, Aramus pictus -European gallinule, Porphyrio porphyrio -American coot, marsh hen, mud hen, water hen, Fulica americana -bustard -ruddy turnstone, Arenaria interpres -red-backed sandpiper, dunlin, Erolia alpina -redshank, Tringa totanus -dowitcher -oystercatcher, oyster catcher -pelican -king penguin, Aptenodytes patagonica -albatross, mollymawk -grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus -killer whale, killer, orca, grampus, sea wolf, Orcinus orca -dugong, Dugong dugon -sea lion -Chihuahua -Japanese spaniel -Maltese dog, Maltese terrier, Maltese -Pekinese, Pekingese, Peke -Shih-Tzu -Blenheim spaniel -papillon -toy terrier -Rhodesian ridgeback -Afghan hound, Afghan -basset, basset hound -beagle -bloodhound, sleuthhound -bluetick -black-and-tan coonhound -Walker hound, Walker foxhound -English foxhound -redbone -borzoi, Russian wolfhound -Irish wolfhound -Italian greyhound -whippet -Ibizan hound, Ibizan Podenco -Norwegian elkhound, elkhound -otterhound, otter hound -Saluki, gazelle hound -Scottish deerhound, deerhound -Weimaraner -Staffordshire bullterrier, Staffordshire bull terrier -American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier -Bedlington terrier -Border terrier -Kerry blue terrier -Irish terrier -Norfolk terrier -Norwich terrier -Yorkshire terrier -wire-haired fox terrier -Lakeland terrier -Sealyham terrier, Sealyham -Airedale, Airedale terrier -cairn, cairn terrier -Australian terrier -Dandie Dinmont, Dandie Dinmont terrier -Boston bull, Boston terrier -miniature schnauzer -giant schnauzer -standard schnauzer -Scotch terrier, Scottish terrier, Scottie -Tibetan terrier, chrysanthemum dog -silky terrier, Sydney silky -soft-coated wheaten terrier -West Highland white terrier -Lhasa, Lhasa apso -flat-coated retriever -curly-coated retriever -golden retriever -Labrador retriever -Chesapeake Bay retriever -German short-haired pointer -vizsla, Hungarian pointer -English setter -Irish setter, red setter -Gordon setter -Brittany spaniel -clumber, clumber spaniel -English springer, English springer spaniel -Welsh springer spaniel -cocker spaniel, English cocker spaniel, cocker -Sussex spaniel -Irish water spaniel -kuvasz -schipperke -groenendael -malinois -briard -kelpie -komondor -Old English sheepdog, bobtail -Shetland sheepdog, Shetland sheep dog, Shetland -collie -Border collie -Bouvier des Flandres, Bouviers des Flandres -Rottweiler -German shepherd, German shepherd dog, German police dog, alsatian -Doberman, Doberman pinscher -miniature pinscher -Greater Swiss Mountain dog -Bernese mountain dog -Appenzeller -EntleBucher -boxer -bull mastiff -Tibetan mastiff -French bulldog -Great Dane -Saint Bernard, St Bernard -Eskimo dog, husky -malamute, malemute, Alaskan malamute -Siberian husky -dalmatian, coach dog, carriage dog -affenpinscher, monkey pinscher, monkey dog -basenji -pug, pug-dog -Leonberg -Newfoundland, Newfoundland dog -Great Pyrenees -Samoyed, Samoyede -Pomeranian -chow, chow chow -keeshond -Brabancon griffon -Pembroke, Pembroke Welsh corgi -Cardigan, Cardigan Welsh corgi -toy poodle -miniature poodle -standard poodle -Mexican hairless -timber wolf, grey wolf, gray wolf, Canis lupus -white wolf, Arctic wolf, Canis lupus tundrarum -red wolf, maned wolf, Canis rufus, Canis niger -coyote, prairie wolf, brush wolf, Canis latrans -dingo, warrigal, warragal, Canis dingo -dhole, Cuon alpinus -African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus -hyena, hyaena -red fox, Vulpes vulpes -kit fox, Vulpes macrotis -Arctic fox, white fox, Alopex lagopus -grey fox, gray fox, Urocyon cinereoargenteus -tabby, tabby cat -tiger cat -Persian cat -Siamese cat, Siamese -Egyptian cat -cougar, puma, catamount, mountain lion, painter, panther, Felis concolor -lynx, catamount -leopard, Panthera pardus -snow leopard, ounce, Panthera uncia -jaguar, panther, Panthera onca, Felis onca -lion, king of beasts, Panthera leo -tiger, Panthera tigris -cheetah, chetah, Acinonyx jubatus -brown bear, bruin, Ursus arctos -American black bear, black bear, Ursus americanus, Euarctos americanus -ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus -sloth bear, Melursus ursinus, Ursus ursinus -mongoose -meerkat, mierkat -tiger beetle -ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle -ground beetle, carabid beetle -long-horned beetle, longicorn, longicorn beetle -leaf beetle, chrysomelid -dung beetle -rhinoceros beetle -weevil -fly -bee -ant, emmet, pismire -grasshopper, hopper -cricket -walking stick, walkingstick, stick insect -cockroach, roach -mantis, mantid -cicada, cicala -leafhopper -lacewing, lacewing fly -dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk -damselfly -admiral -ringlet, ringlet butterfly -monarch, monarch butterfly, milkweed butterfly, Danaus plexippus -cabbage butterfly -sulphur butterfly, sulfur butterfly -lycaenid, lycaenid butterfly -starfish, sea star -sea urchin -sea cucumber, holothurian -wood rabbit, cottontail, cottontail rabbit -hare -Angora, Angora rabbit -hamster -porcupine, hedgehog -fox squirrel, eastern fox squirrel, Sciurus niger -marmot -beaver -guinea pig, Cavia cobaya -sorrel -zebra -hog, pig, grunter, squealer, Sus scrofa -wild boar, boar, Sus scrofa -warthog -hippopotamus, hippo, river horse, Hippopotamus amphibius -ox -water buffalo, water ox, Asiatic buffalo, Bubalus bubalis -bison -ram, tup -bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis -ibex, Capra ibex -hartebeest -impala, Aepyceros melampus -gazelle -Arabian camel, dromedary, Camelus dromedarius -llama -weasel -mink -polecat, fitch, foulmart, foumart, Mustela putorius -black-footed ferret, ferret, Mustela nigripes -otter -skunk, polecat, wood pussy -badger -armadillo -three-toed sloth, ai, Bradypus tridactylus -orangutan, orang, orangutang, Pongo pygmaeus -gorilla, Gorilla gorilla -chimpanzee, chimp, Pan troglodytes -gibbon, Hylobates lar -siamang, Hylobates syndactylus, Symphalangus syndactylus -guenon, guenon monkey -patas, hussar monkey, Erythrocebus patas -baboon -macaque -langur -colobus, colobus monkey -proboscis monkey, Nasalis larvatus -marmoset -capuchin, ringtail, Cebus capucinus -howler monkey, howler -titi, titi monkey -spider monkey, Ateles geoffroyi -squirrel monkey, Saimiri sciureus -Madagascar cat, ring-tailed lemur, Lemur catta -indri, indris, Indri indri, Indri brevicaudatus -Indian elephant, Elephas maximus -African elephant, Loxodonta africana -lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens -giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca -barracouta, snoek -eel -coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch -rock beauty, Holocanthus tricolor -anemone fish -sturgeon -gar, garfish, garpike, billfish, Lepisosteus osseus -lionfish -puffer, pufferfish, blowfish, globefish -abacus -abaya -academic gown, academic robe, judge's robe -accordion, piano accordion, squeeze box -acoustic guitar -aircraft carrier, carrier, flattop, attack aircraft carrier -airliner -airship, dirigible -altar -ambulance -amphibian, amphibious vehicle -analog clock -apiary, bee house -apron -ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin -assault rifle, assault gun -backpack, back pack, knapsack, packsack, rucksack, haversack -bakery, bakeshop, bakehouse -balance beam, beam -balloon -ballpoint, ballpoint pen, ballpen, Biro -Band Aid -banjo -bannister, banister, balustrade, balusters, handrail -barbell -barber chair -barbershop -barn -barometer -barrel, cask -barrow, garden cart, lawn cart, wheelbarrow -baseball -basketball -bassinet -bassoon -bathing cap, swimming cap -bath towel -bathtub, bathing tub, bath, tub -beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon -beacon, lighthouse, beacon light, pharos -beaker -bearskin, busby, shako -beer bottle -beer glass -bell cote, bell cot -bib -bicycle-built-for-two, tandem bicycle, tandem -bikini, two-piece -binder, ring-binder -binoculars, field glasses, opera glasses -birdhouse -boathouse -bobsled, bobsleigh, bob -bolo tie, bolo, bola tie, bola -bonnet, poke bonnet -bookcase -bookshop, bookstore, bookstall -bottlecap -bow -bow tie, bow-tie, bowtie -brass, memorial tablet, plaque -brassiere, bra, bandeau -breakwater, groin, groyne, mole, bulwark, seawall, jetty -breastplate, aegis, egis -broom -bucket, pail -buckle -bulletproof vest -bullet train, bullet -butcher shop, meat market -cab, hack, taxi, taxicab -caldron, cauldron -candle, taper, wax light -cannon -canoe -can opener, tin opener -cardigan -car mirror -carousel, carrousel, merry-go-round, roundabout, whirligig -carpenter's kit, tool kit -carton -car wheel -cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM -cassette -cassette player -castle -catamaran -CD player -cello, violoncello -cellular telephone, cellular phone, cellphone, cell, mobile phone -chain -chainlink fence -chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour -chain saw, chainsaw -chest -chiffonier, commode -chime, bell, gong -china cabinet, china closet -Christmas stocking -church, church building -cinema, movie theater, movie theatre, movie house, picture palace -cleaver, meat cleaver, chopper -cliff dwelling -cloak -clog, geta, patten, sabot -cocktail shaker -coffee mug -coffeepot -coil, spiral, volute, whorl, helix -combination lock -computer keyboard, keypad -confectionery, confectionary, candy store -container ship, containership, container vessel -convertible -corkscrew, bottle screw -cornet, horn, trumpet, trump -cowboy boot -cowboy hat, ten-gallon hat -cradle -crane -crash helmet -crate -crib, cot -Crock Pot -croquet ball -crutch -cuirass -dam, dike, dyke -desk -desktop computer -dial telephone, dial phone -diaper, nappy, napkin -digital clock -digital watch -dining table, board -dishrag, dishcloth -dishwasher, dish washer, dishwashing machine -disk brake, disc brake -dock, dockage, docking facility -dogsled, dog sled, dog sleigh -dome -doormat, welcome mat -drilling platform, offshore rig -drum, membranophone, tympan -drumstick -dumbbell -Dutch oven -electric fan, blower -electric guitar -electric locomotive -entertainment center -envelope -espresso maker -face powder -feather boa, boa -file, file cabinet, filing cabinet -fireboat -fire engine, fire truck -fire screen, fireguard -flagpole, flagstaff -flute, transverse flute -folding chair -football helmet -forklift -fountain -fountain pen -four-poster -freight car -French horn, horn -frying pan, frypan, skillet -fur coat -garbage truck, dustcart -gasmask, respirator, gas helmet -gas pump, gasoline pump, petrol pump, island dispenser -goblet -go-kart -golf ball -golfcart, golf cart -gondola -gong, tam-tam -gown -grand piano, grand -greenhouse, nursery, glasshouse -grille, radiator grille -grocery store, grocery, food market, market -guillotine -hair slide -hair spray -half track -hammer -hamper -hand blower, blow dryer, blow drier, hair dryer, hair drier -hand-held computer, hand-held microcomputer -handkerchief, hankie, hanky, hankey -hard disc, hard disk, fixed disk -harmonica, mouth organ, harp, mouth harp -harp -harvester, reaper -hatchet -holster -home theater, home theatre -honeycomb -hook, claw -hoopskirt, crinoline -horizontal bar, high bar -horse cart, horse-cart -hourglass -iPod -iron, smoothing iron -jack-o'-lantern -jean, blue jean, denim -jeep, landrover -jersey, T-shirt, tee shirt -jigsaw puzzle -jinrikisha, ricksha, rickshaw -joystick -kimono -knee pad -knot -lab coat, laboratory coat -ladle -lampshade, lamp shade -laptop, laptop computer -lawn mower, mower -lens cap, lens cover -letter opener, paper knife, paperknife -library -lifeboat -lighter, light, igniter, ignitor -limousine, limo -liner, ocean liner -lipstick, lip rouge -Loafer -lotion -loudspeaker, speaker, speaker unit, loudspeaker system, speaker system -loupe, jeweler's loupe -lumbermill, sawmill -magnetic compass -mailbag, postbag -mailbox, letter box -maillot -maillot, tank suit -manhole cover -maraca -marimba, xylophone -mask -matchstick -maypole -maze, labyrinth -measuring cup -medicine chest, medicine cabinet -megalith, megalithic structure -microphone, mike -microwave, microwave oven -military uniform -milk can -minibus -miniskirt, mini -minivan -missile -mitten -mixing bowl -mobile home, manufactured home -Model T -modem -monastery -monitor -moped -mortar -mortarboard -mosque -mosquito net -motor scooter, scooter -mountain bike, all-terrain bike, off-roader -mountain tent -mouse, computer mouse -mousetrap -moving van -muzzle -nail -neck brace -necklace -nipple -notebook, notebook computer -obelisk -oboe, hautboy, hautbois -ocarina, sweet potato -odometer, hodometer, mileometer, milometer -oil filter -organ, pipe organ -oscilloscope, scope, cathode-ray oscilloscope, CRO -overskirt -oxcart -oxygen mask -packet -paddle, boat paddle -paddlewheel, paddle wheel -padlock -paintbrush -pajama, pyjama, pj's, jammies -palace -panpipe, pandean pipe, syrinx -paper towel -parachute, chute -parallel bars, bars -park bench -parking meter -passenger car, coach, carriage -patio, terrace -pay-phone, pay-station -pedestal, plinth, footstall -pencil box, pencil case -pencil sharpener -perfume, essence -Petri dish -photocopier -pick, plectrum, plectron -pickelhaube -picket fence, paling -pickup, pickup truck -pier -piggy bank, penny bank -pill bottle -pillow -ping-pong ball -pinwheel -pirate, pirate ship -pitcher, ewer -plane, carpenter's plane, woodworking plane -planetarium -plastic bag -plate rack -plow, plough -plunger, plumber's helper -Polaroid camera, Polaroid Land camera -pole -police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria -poncho -pool table, billiard table, snooker table -pop bottle, soda bottle -pot, flowerpot -potter's wheel -power drill -prayer rug, prayer mat -printer -prison, prison house -projectile, missile -projector -puck, hockey puck -punching bag, punch bag, punching ball, punchball -purse -quill, quill pen -quilt, comforter, comfort, puff -racer, race car, racing car -racket, racquet -radiator -radio, wireless -radio telescope, radio reflector -rain barrel -recreational vehicle, RV, R.V. -reel -reflex camera -refrigerator, icebox -remote control, remote -restaurant, eating house, eating place, eatery -revolver, six-gun, six-shooter -rifle -rocking chair, rocker -rotisserie -rubber eraser, rubber, pencil eraser -rugby ball -rule, ruler -running shoe -safe -safety pin -saltshaker, salt shaker -sandal -sarong -sax, saxophone -scabbard -scale, weighing machine -school bus -schooner -scoreboard -screen, CRT screen -screw -screwdriver -seat belt, seatbelt -sewing machine -shield, buckler -shoe shop, shoe-shop, shoe store -shoji -shopping basket -shopping cart -shovel -shower cap -shower curtain -ski -ski mask -sleeping bag -slide rule, slipstick -sliding door -slot, one-armed bandit -snorkel -snowmobile -snowplow, snowplough -soap dispenser -soccer ball -sock -solar dish, solar collector, solar furnace -sombrero -soup bowl -space bar -space heater -space shuttle -spatula -speedboat -spider web, spider's web -spindle -sports car, sport car -spotlight, spot -stage -steam locomotive -steel arch bridge -steel drum -stethoscope -stole -stone wall -stopwatch, stop watch -stove -strainer -streetcar, tram, tramcar, trolley, trolley car -stretcher -studio couch, day bed -stupa, tope -submarine, pigboat, sub, U-boat -suit, suit of clothes -sundial -sunglass -sunglasses, dark glasses, shades -sunscreen, sunblock, sun blocker -suspension bridge -swab, swob, mop -sweatshirt -swimming trunks, bathing trunks -swing -switch, electric switch, electrical switch -syringe -table lamp -tank, army tank, armored combat vehicle, armoured combat vehicle -tape player -teapot -teddy, teddy bear -television, television system -tennis ball -thatch, thatched roof -theater curtain, theatre curtain -thimble -thresher, thrasher, threshing machine -throne -tile roof -toaster -tobacco shop, tobacconist shop, tobacconist -toilet seat -torch -totem pole -tow truck, tow car, wrecker -toyshop -tractor -trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi -tray -trench coat -tricycle, trike, velocipede -trimaran -tripod -triumphal arch -trolleybus, trolley coach, trackless trolley -trombone -tub, vat -turnstile -typewriter keyboard -umbrella -unicycle, monocycle -upright, upright piano -vacuum, vacuum cleaner -vase -vault -velvet -vending machine -vestment -viaduct -violin, fiddle -volleyball -waffle iron -wall clock -wallet, billfold, notecase, pocketbook -wardrobe, closet, press -warplane, military plane -washbasin, handbasin, washbowl, lavabo, wash-hand basin -washer, automatic washer, washing machine -water bottle -water jug -water tower -whiskey jug -whistle -wig -window screen -window shade -Windsor tie -wine bottle -wing -wok -wooden spoon -wool, woolen, woollen -worm fence, snake fence, snake-rail fence, Virginia fence -wreck -yawl -yurt -web site, website, internet site, site -comic book -crossword puzzle, crossword -street sign -traffic light, traffic signal, stoplight -book jacket, dust cover, dust jacket, dust wrapper -menu -plate -guacamole -consomme -hot pot, hotpot -trifle -ice cream, icecream -ice lolly, lolly, lollipop, popsicle -French loaf -bagel, beigel -pretzel -cheeseburger -hotdog, hot dog, red hot -mashed potato -head cabbage -broccoli -cauliflower -zucchini, courgette -spaghetti squash -acorn squash -butternut squash -cucumber, cuke -artichoke, globe artichoke -bell pepper -cardoon -mushroom -Granny Smith -strawberry -orange -lemon -fig -pineapple, ananas -banana -jackfruit, jak, jack -custard apple -pomegranate -hay -carbonara -chocolate sauce, chocolate syrup -dough -meat loaf, meatloaf -pizza, pizza pie -potpie -burrito -red wine -espresso -cup -eggnog -alp -bubble -cliff, drop, drop-off -coral reef -geyser -lakeside, lakeshore -promontory, headland, head, foreland -sandbar, sand bar -seashore, coast, seacoast, sea-coast -valley, vale -volcano -ballplayer, baseball player -groom, bridegroom -scuba diver -rapeseed -daisy -yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum -corn -acorn -hip, rose hip, rosehip -buckeye, horse chestnut, conker -coral fungus -agaric -gyromitra -stinkhorn, carrion fungus -earthstar -hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa -bolete -ear, spike, capitulum -toilet tissue, toilet paper, bathroom tissue diff --git a/inference-engine/samples/speech_sample/CMakeLists.txt b/inference-engine/samples/speech_sample/CMakeLists.txt index e789f7af86f02b..ec821d67301ce6 100644 --- a/inference-engine/samples/speech_sample/CMakeLists.txt +++ b/inference-engine/samples/speech_sample/CMakeLists.txt @@ -2,34 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 # -set (TARGET_NAME "speech_sample") - -file (GLOB MAIN_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -file (GLOB MAIN_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${MAIN_SRC}) -source_group("include" FILES ${MAIN_HEADERS}) - - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS}) - -add_dependencies(${TARGET_NAME} gflags) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} IE::ie_cpu_extension gflags) - -if(UNIX) - target_link_libraries( ${TARGET_NAME} ${LIB_DL} pthread) -endif() +ie_add_sample(NAME speech_sample + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" + HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/speech_sample.hpp") diff --git a/inference-engine/samples/speech_sample/README.md b/inference-engine/samples/speech_sample/README.md index a9ca9385827611..0046c97f3f92db 100644 --- a/inference-engine/samples/speech_sample/README.md +++ b/inference-engine/samples/speech_sample/README.md @@ -58,6 +58,7 @@ Otherwise, an error will occur. If the device is set to `GNA_SW`, the GNA device is emulated in fast-but-not-bit-exact mode. Finally, if the device is set to `GNA_SW_EXACT`, the GNA device is emulated in bit-exact mode. +`GNA_SW_FP32` mode is used for calculation on CPU device using GNA Plugin. #### Loading and Saving Models @@ -89,13 +90,13 @@ speech_sample [OPTION] Options: -h Print a usage message. - -i "" Required. Path to an .ark file. + -i "" Required. Paths to an .ark files. Example of usage: or . -m "" Required. Path to an .xml file with a trained model (required if -rg is missing). -o "" Optional. Output file name (default name is "scores.ark"). -l "" Required for CPU custom layers. Absolute path to a shared library with the kernel implementations. - -d "" Optional. Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified. + -d "" Optional. Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT, GNA_SW_FP32 and HETERO with combination of GNA + as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. The sample will look for a suitable plugin for device specified. -p Optional. Plugin name. For example, GPU. If this parameter is set, the sample will look for this plugin only - -pp Optional. Path to a plugin folder. -pc Optional. Enables performance report -q "" Optional. Input quantization mode: "static" (default), "dynamic", or "user" (use with -sf). -qb "" Optional. Weight bits for quantization: 8 or 16 (default) @@ -106,7 +107,10 @@ Options: -wg "" Optional. Write GNA model to file using path/filename provided. -we "" Optional. Write GNA embedded model to file using path/filename provided. -nthreads "" Optional. Number of threads to use for concurrent async inference requests on the GNA. - -cw "" Optional. Number of frames for context windows (default is 0). Works only with context window networks. If you use the cw flag, the batch size and nthreads arguments are ignored. + -cw_l "" Optional. Number of frames for left context windows (default is 0). Works only with context window networks. + If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored. + -cw_r "" Optional. Number of frames for right context windows (default is 0). Works only with context window networks. + If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored. ``` diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp index a123f4c70657ba..efc38ca398c203 100644 --- a/inference-engine/samples/speech_sample/main.cpp +++ b/inference-engine/samples/speech_sample/main.cpp @@ -265,7 +265,7 @@ float StdDevRelError(score_error_t error) { - (error.sumRelError / error.numScores) * (error.sumRelError / error.numScores))); } -#if !defined(__arm__) && !defined(_M_ARM) +#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) #if defined(_WIN32) || defined(WIN32) #include #include @@ -325,7 +325,7 @@ float getGnaFrequencyMHz() { } } -#endif // !defined(__arm__) && !defined(_M_ARM) +#endif // if not ARM void printReferenceCompareResults(score_error_t const &totalError, size_t framesNum, @@ -344,8 +344,8 @@ void printReferenceCompareResults(score_error_t const &totalError, void printPerformanceCounters(std::map const &utterancePerfMap, size_t callsNum, - std::ostream &stream) { -#if !defined(__arm__) && !defined(_M_ARM) + std::ostream &stream, std::string fullDeviceName) { +#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) stream << std::endl << "Performance counts:" << std::endl; stream << std::setw(10) << std::right << "" << "Counter descriptions"; stream << std::setw(22) << "Utt scoring time"; @@ -371,6 +371,9 @@ void printPerformanceCounters(std::map possibleDeviceTypes = { + std::vector supportedDevices = { "CPU", "GPU", "GNA_AUTO", "GNA_HW", "GNA_SW_EXACT", "GNA_SW", + "GNA_SW_FP32", "HETERO:GNA,CPU", "HETERO:GNA_HW,CPU", "HETERO:GNA_SW_EXACT,CPU", "HETERO:GNA_SW,CPU", + "HETERO:GNA_SW_FP32,CPU", + "MYRIAD" }; - if (std::find(possibleDeviceTypes.begin(), possibleDeviceTypes.end(), FLAGS_d) == possibleDeviceTypes.end()) { + if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) { throw std::logic_error("Specified device is not supported."); } @@ -459,11 +466,15 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { } if (FLAGS_nthreads <= 0) { - throw std::logic_error("Not valid value for 'nthreads' argument. It should be > 0 "); + throw std::logic_error("Invalid value for 'nthreads' argument. It must be greater that or equal to 0"); } - if (FLAGS_cw < 0) { - throw std::logic_error("Not valid value for 'cw' argument. It should be > 0 "); + if (FLAGS_cw_r < 0) { + throw std::logic_error("Invalid value for 'cw_r' argument. It must be greater than or equal to 0"); + } + + if (FLAGS_cw_l < 0) { + throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0"); } return true; @@ -494,23 +505,39 @@ int main(int argc, char *argv[]) { std::string deviceStr = useHetero && useGna ? "HETERO:GNA,CPU" : FLAGS_d.substr(0, (FLAGS_d.find("_"))); float scaleFactorInput = static_cast(FLAGS_sf); - uint32_t batchSize = FLAGS_cw > 0 ? 1 : (uint32_t) FLAGS_bs; - /** Extract input ark file name **/ - std::string inputArkName = fileNameNoExt(FLAGS_i) + ".ark"; + uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t) FLAGS_bs; - uint32_t numUtterances(0), numBytesThisUtterance(0); + std::vector inputArkFiles; + std::vector numBytesThisUtterance; + uint32_t numUtterances(0); if (!FLAGS_i.empty()) { - GetKaldiArkInfo(inputArkName.c_str(), 0, &numUtterances, &numBytesThisUtterance); + std::string outStr; + std::istringstream stream(FLAGS_i); + + uint32_t currentNumUtterances(0), currentNumBytesThisUtterance(0); + while (getline(stream, outStr, ',')) { + std::string filename(fileNameNoExt(outStr) + ".ark"); + inputArkFiles.push_back(filename); + + GetKaldiArkInfo(filename.c_str(), 0, ¤tNumUtterances, ¤tNumBytesThisUtterance); + if (numUtterances == 0) { + numUtterances = currentNumUtterances; + } else if (currentNumUtterances != numUtterances) { + throw std::logic_error("Incorrect input files. Number of utterance must be the same for all ark files"); + } + numBytesThisUtterance.push_back(currentNumBytesThisUtterance); + } } + size_t numInputArkFiles(inputArkFiles.size()); // ----------------------------------------------------------------------------------------------------- - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - slog::info << "Loading plugin" << slog::endl; - /** Loading plugin for device **/ - InferencePlugin plugin = PluginDispatcher({FLAGS_pp}).getPluginByDevice(deviceStr); + // --------------------------- 1. Load inference engine ------------------------------------- + slog::info << "Loading Inference Engine" << slog::endl; + Core ie; - /** Printing plugin version **/ - std::cout << plugin.GetVersion() << std::endl << std::endl; + /** Printing device version **/ + slog::info << "Device info: " << slog::endl; + std::cout << ie.GetVersions(deviceStr) << std::endl; // ----------------------------------------------------------------------------------------------------- // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ @@ -534,7 +561,7 @@ int main(int argc, char *argv[]) { << slog::endl; } - /** Setting plugin parameter for per layer metrics **/ + /** Setting parameter for per layer metrics **/ std::map gnaPluginConfig; std::map genericPluginConfig; if (useGna) { @@ -542,13 +569,13 @@ int main(int argc, char *argv[]) { useHetero ? FLAGS_d.substr(FLAGS_d.find("GNA"), FLAGS_d.find(",") - FLAGS_d.find("GNA")) : FLAGS_d; gnaPluginConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] = gnaDevice.find("_") == std::string::npos ? "GNA_AUTO" : gnaDevice; - } else if (plugin.GetVersion()->description == std::string("MKLDNNPlugin")) { + } else if (deviceStr.find("CPU") != std::string::npos) { /** * cpu_extensions library is compiled from "extension" folder containing * custom MKLDNNPlugin layer implementations. These layers are not supported * by mkldnn, but they can be useful for inferring custom topologies. **/ - plugin.AddExtension(std::make_shared()); + ie.AddExtension(std::make_shared(), "CPU"); } if (FLAGS_pc) { @@ -556,26 +583,35 @@ int main(int argc, char *argv[]) { } if (FLAGS_q.compare("user") == 0) { - std::cout << "[ INFO ] Using scale factor of " << FLAGS_sf << std::endl; + if (numInputArkFiles > 1) { + std::string errMessage("Incorrect use case for multiple input ark files. Please don't use -q 'user' for this case."); + throw std::logic_error(errMessage); + } + slog::info << "Using scale factor of " << FLAGS_sf << slog::endl; gnaPluginConfig[GNA_CONFIG_KEY(SCALE_FACTOR)] = std::to_string(FLAGS_sf); - } else { // "static" quantization with calculated scale factor - std::string name; - std::vector ptrFeatures; - uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0); - GetKaldiArkInfo(inputArkName.c_str(), 0, &numArrays, &numBytes); - ptrFeatures.resize(numBytes); - LoadKaldiArkArray(inputArkName.c_str(), - 0, - name, - ptrFeatures, - &numFrames, - &numFrameElements, - &numBytesPerElement); - scaleFactorInput = - ScaleFactorForQuantization(ptrFeatures.data(), MAX_VAL_2B_FEAT, numFrames * numFrameElements); - slog::info << "Using scale factor of " << scaleFactorInput << " calculated from first utterance." - << slog::endl; - gnaPluginConfig[GNA_CONFIG_KEY(SCALE_FACTOR)] = std::to_string(scaleFactorInput); + } else { + // "static" quantization with calculated scale factor + for (size_t i = 0; i < numInputArkFiles; i++) { + auto inputArkName = inputArkFiles[i].c_str(); + std::string name; + std::vector ptrFeatures; + uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0); + GetKaldiArkInfo(inputArkName, 0, &numArrays, &numBytes); + ptrFeatures.resize(numBytes); + LoadKaldiArkArray(inputArkName, + 0, + name, + ptrFeatures, + &numFrames, + &numFrameElements, + &numBytesPerElement); + scaleFactorInput = + ScaleFactorForQuantization(ptrFeatures.data(), MAX_VAL_2B_FEAT, numFrames * numFrameElements); + slog::info << "Using scale factor of " << scaleFactorInput << " calculated from first utterance." + << slog::endl; + std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i); + gnaPluginConfig[scaleFactorConfigKey] = std::to_string(scaleFactorInput); + } } if (FLAGS_qb == 8) { @@ -584,7 +620,7 @@ int main(int argc, char *argv[]) { gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16"; } - gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string(FLAGS_cw > 0 ? 1 : FLAGS_nthreads); + gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads); gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO); // ----------------------------------------------------------------------------------------------------- @@ -595,7 +631,7 @@ int main(int argc, char *argv[]) { } // ----------------------------------------------------------------------------------------------------- - // --------------------------- 5. Loading model to the plugin ------------------------------------------ + // --------------------------- 5. Loading model to the device ------------------------------------------ if (useGna) { genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig)); @@ -604,11 +640,11 @@ int main(int argc, char *argv[]) { ExecutableNetwork executableNet; if (!FLAGS_m.empty()) { - slog::info << "Loading model to the plugin" << slog::endl; - executableNet = plugin.LoadNetwork(netBuilder.getNetwork(), genericPluginConfig); + slog::info << "Loading model to the device" << slog::endl; + executableNet = ie.LoadNetwork(netBuilder.getNetwork(), deviceStr, genericPluginConfig); } else { - slog::info << "Importing model to the plugin" << slog::endl; - executableNet = plugin.ImportNetwork(FLAGS_rg.c_str(), genericPluginConfig); + slog::info << "Importing model to the device" << slog::endl; + executableNet = ie.ImportNetwork(FLAGS_rg.c_str(), deviceStr, genericPluginConfig); } ms loadTime = std::chrono::duration_cast(Time::now() - t0); @@ -629,7 +665,7 @@ int main(int argc, char *argv[]) { return 0; } - std::vector inferRequests(FLAGS_cw > 0 ? 1 : FLAGS_nthreads); + std::vector inferRequests((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads); for (auto& inferRequest : inferRequests) { inferRequest = {executableNet.CreateInferRequest(), -1, batchSize}; } @@ -638,23 +674,27 @@ int main(int argc, char *argv[]) { // --------------------------- 7. Prepare input blobs -------------------------------------------------- /** Taking information about all topology inputs **/ ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo(); - InputsDataMap inputInfo; - if (!FLAGS_m.empty()) { - inputInfo = netBuilder.getNetwork().getInputsInfo(); - } - /** Stores all input blobs data **/ - if (cInputInfo.size() != 1) { - throw std::logic_error("Sample supports only topologies with 1 input"); + if (cInputInfo.size() != numInputArkFiles) { + throw std::logic_error("Number of network inputs(" + + std::to_string(cInputInfo.size()) + ") is not equal to number of ark files(" + + std::to_string(numInputArkFiles) + ")"); } - Blob::Ptr ptrInputBlob = inferRequests[0].inferRequest.GetBlob(cInputInfo.begin()->first); + std::vector ptrInputBlobs; + for (auto& input : cInputInfo) { + ptrInputBlobs.push_back(inferRequests.begin()->inferRequest.GetBlob(input.first)); + } + InputsDataMap inputInfo; + if (!FLAGS_m.empty()) { + inputInfo = netBuilder.getNetwork().getInputsInfo(); + } /** configure input precision if model loaded from IR **/ for (auto &item : inputInfo) { Precision inputPrecision = Precision::FP32; // specify Precision::I16 to provide quantized inputs item.second->setPrecision(inputPrecision); - item.second->getInputData()->layout = NC; // row major layout + item.second->getInputData()->setLayout(Layout::NC); // row major layout } // ----------------------------------------------------------------------------------------------------- @@ -666,7 +706,7 @@ int main(int argc, char *argv[]) { outputInfo = netBuilder.getNetwork().getOutputsInfo(); } - Blob::Ptr ptrOutputBlob = inferRequests[0].inferRequest.GetBlob(cOutputInfo.begin()->first); + Blob::Ptr ptrOutputBlob = inferRequests[0].inferRequest.GetBlob(cOutputInfo.rbegin()->first); for (auto &item : outputInfo) { DataPtr outData = item.second; @@ -676,40 +716,62 @@ int main(int argc, char *argv[]) { Precision outputPrecision = Precision::FP32; // specify Precision::I32 to retrieve quantized outputs outData->setPrecision(outputPrecision); - outData->layout = NC; // row major layout + outData->setLayout(Layout::NC); // row major layout } // ----------------------------------------------------------------------------------------------------- // --------------------------- 9. Do inference --------------------------------------------------------- - std::vector ptrUtterance; + std::vector> ptrUtterances; std::vector ptrScores; std::vector ptrReferenceScores; score_error_t frameError, totalError; + ptrUtterances.resize(inputArkFiles.size()); for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) { std::map utterancePerfMap; std::string uttName; - uint32_t numFrames(0), numFrameElementsInput(0), numBytesPerElementInput(0), n(0); + uint32_t numFrames(0), n(0); + std::vector numFrameElementsInput; + uint32_t numFramesReference(0), numFrameElementsReference(0), numBytesPerElementReference(0), numBytesReferenceScoreThisUtterance(0); const uint32_t numScoresPerFrame = ptrOutputBlob->size() / batchSize; - GetKaldiArkInfo(inputArkName.c_str(), utteranceIndex, &n, &numBytesThisUtterance); - ptrUtterance.resize(numBytesThisUtterance); - LoadKaldiArkArray(inputArkName.c_str(), - utteranceIndex, - uttName, - ptrUtterance, - &numFrames, - &numFrameElementsInput, - &numBytesPerElementInput); - - uint32_t numFrameElementsInputPadded = numFrameElementsInput; - - if (ptrInputBlob->size() != numFrameElementsInputPadded * batchSize) { - throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) + - ") mismatch to ark file size (" + - std::to_string(numFrameElementsInputPadded * batchSize) + ")"); + + numFrameElementsInput.resize(numInputArkFiles); + for (size_t i = 0; i < inputArkFiles.size(); i++) { + std::vector ptrUtterance; + auto inputArkFilename = inputArkFiles[i].c_str(); + uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0); + GetKaldiArkInfo(inputArkFilename, utteranceIndex, &n, &numBytesThisUtterance[i]); + ptrUtterance.resize(numBytesThisUtterance[i]); + LoadKaldiArkArray(inputArkFilename, + utteranceIndex, + uttName, + ptrUtterance, + ¤tNumFrames, + ¤tNumFrameElementsInput, + ¤tNumBytesPerElementInput); + if (numFrames == 0) { + numFrames = currentNumFrames; + } else if (numFrames != currentNumFrames) { + std::string errMessage("Number of frames in ark files is different: " + std::to_string(numFrames) + + " and " + std::to_string(currentNumFrames)); + throw std::logic_error(errMessage); + } + + ptrUtterances[i] = ptrUtterance; + numFrameElementsInput[i] = currentNumFrameElementsInput; + } + + int i = 0; + for (auto& ptrInputBlob : ptrInputBlobs) { + if (ptrInputBlob->size() != numFrameElementsInput[i++] * batchSize) { + throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) + + ") mismatch to ark file size (" + + std::to_string(numFrameElementsInput[i-1] * batchSize) + ")"); + } } + ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float)); if (!FLAGS_r.empty()) { std::string refUtteranceName; @@ -730,13 +792,17 @@ int main(int argc, char *argv[]) { ClearScoreError(&totalError); totalError.threshold = frameError.threshold = MAX_SCORE_DIFFERENCE; - auto inputFrame = &ptrUtterance.front(); auto outputFrame = &ptrScores.front(); + std::vector inputFrame; + for (auto& ut : ptrUtterances) { + inputFrame.push_back(&ut.front()); + } std::map callPerfMap; size_t frameIndex = 0; - numFrames += 2 * FLAGS_cw; + uint32_t numFramesArkFile = numFrames; + numFrames += FLAGS_cw_l + FLAGS_cw_r; uint32_t numFramesThisBatch{batchSize}; auto t0 = Time::now(); @@ -763,6 +829,7 @@ int main(int argc, char *argv[]) { if (inferRequest.frameIndex != -1) { StatusCode code = inferRequest.inferRequest.Wait( InferenceEngine::IInferRequest::WaitMode::RESULT_READY); + if (code != StatusCode::OK) { if (!useHetero) continue; if (code != StatusCode::INFER_NOT_STARTED) continue; @@ -804,30 +871,37 @@ int main(int argc, char *argv[]) { continue; } - Blob::Ptr inputBlob = inferRequest.inferRequest.GetBlob(cInputInfo.begin()->first); + ptrInputBlobs.clear(); + for (auto& input : cInputInfo) { + ptrInputBlobs.push_back(inferRequest.inferRequest.GetBlob(input.first)); + } - std::memcpy(inputBlob->buffer(), - inputFrame, - inputBlob->byteSize()); + for (size_t i = 0; i < numInputArkFiles; i++) { + std::memcpy(ptrInputBlobs[i]->buffer(), + inputFrame[i], + ptrInputBlobs[i]->byteSize()); + } - int index = static_cast(frameIndex) - 2 * FLAGS_cw; + int index = static_cast(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r); inferRequest.inferRequest.StartAsync(); inferRequest.frameIndex = index < 0 ? -2 : index; inferRequest.numFramesThisBatch = numFramesThisBatch; frameIndex += numFramesThisBatch; - - if (FLAGS_cw > 0) { - int i = frameIndex - FLAGS_cw; - if (i > 0 && i < static_cast(numFrames)) { - inputFrame += sizeof(float) * numFrameElementsInput * numFramesThisBatch; - } else if (i >= static_cast(numFrames)) { - inputFrame = &ptrUtterance.front() + - (numFrames - 1) * sizeof(float) * numFrameElementsInput * - numFramesThisBatch; + for (size_t j = 0; j < inputArkFiles.size(); j++) { + if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) { + int i = frameIndex - FLAGS_cw_l; + if (i > 0 && i < static_cast(numFramesArkFile)) { + inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; + } else if (i >= static_cast(numFramesArkFile)) { + inputFrame[j] = &ptrUtterances[0].front() + + (numFramesArkFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; + } else if (i < 0) { + inputFrame[j] = &ptrUtterances[0].front(); + } + } else { + inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; } - } else { - inputFrame += sizeof(float) * numFrameElementsInput * numFramesThisBatch; } inferRequestFetched |= true; } @@ -863,7 +937,7 @@ int main(int argc, char *argv[]) { << std::endl; if (FLAGS_pc) { // print - printPerformanceCounters(utterancePerfMap, frameIndex, std::cout); + printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d)); } if (!FLAGS_r.empty()) { printReferenceCompareResults(totalError, numFrames, std::cout); diff --git a/inference-engine/samples/speech_sample/speech_sample.hpp b/inference-engine/samples/speech_sample/speech_sample.hpp index dc5c125f66d4ed..63a18b2fe0594a 100644 --- a/inference-engine/samples/speech_sample/speech_sample.hpp +++ b/inference-engine/samples/speech_sample/speech_sample.hpp @@ -9,20 +9,11 @@ #include #include -#ifdef _WIN32 -#include -#else -#include -#endif - /// @brief message for help argument static const char help_message[] = "Print a usage message."; /// @brief message for images argument -static const char input_message[] = "Required. Path to an .ark file."; - -/// @brief message for plugin_path argument -static const char plugin_path_message[] = "Path to a plugin folder."; +static const char input_message[] = "Required. Paths to an .ark files. Example of usage: or ."; /// @brief message for model argument static const char model_message[] = "Required. Path to an .xml file with a trained model (required if -rg is missing)."; @@ -32,10 +23,10 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If "the sample will look for this plugin only"; /// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, " +static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_FP32 " "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU" - " as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look " - "for a suitable plugin for device specified."; + " as a secondary (e.g. HETERO:GNA,CPU) are supported. The list of available devices is shown below. " + "The sample will look for a suitable plugin for device specified."; /// @brief message for performance counters static const char performance_counter_message[] = "Enables per-layer performance report"; @@ -75,10 +66,15 @@ static const char batch_size_message[] = "Batch size 1-8 (default 1)"; static const char infer_num_threads_message[] = "Optional. Number of threads to use for concurrent async" \ " inference requests on the GNA."; -/// @brief message for context window argument -static const char context_window_message[] = "Optional. Number of frames for context windows (default is 0). " \ - "Works only with context window networks." - " If you use the cw flag, then batch size and nthreads arguments are ignored."; +/// @brief message for left context window argument +static const char context_window_message_l[] = "Optional. Number of frames for left context windows (default is 0). " \ + "Works only with context window networks." + " If you use the cw_l or cw_r flag, then batch size and nthreads arguments are ignored."; + +/// @brief message for right context window argument +static const char context_window_message_r[] = "Optional. Number of frames for right context windows (default is 0). " \ + "Works only with context window networks." + " If you use the cw_r or cw_l flag, then batch size and nthreads arguments are ignored."; /// \brief Define flag for showing help message
DEFINE_bool(h, false, help_message); @@ -95,10 +91,6 @@ DEFINE_string(m, "", model_message); /// It is a required parameter DEFINE_string(p, "", plugin_message); -/// \brief Define parameter for set path to plugins
-/// Default is ./lib -DEFINE_string(pp, "", plugin_path_message); - /// \brief device the target device to infer on
DEFINE_string(d, "GNA_AUTO", target_device_message); @@ -139,8 +131,11 @@ DEFINE_int32(bs, 1, batch_size_message); /// @brief Number of threads to use for inference on the CPU (also affects Hetero cases) DEFINE_int32(nthreads, 1, infer_num_threads_message); -/// @brief Context window size (default 0) -DEFINE_int32(cw, 0, context_window_message); +/// @brief Right context window size (default 0) +DEFINE_int32(cw_r, 0, context_window_message_r); + +/// @brief Left context window size (default 0) +DEFINE_int32(cw_l, 0, context_window_message_l); /** * \brief This function show a help message @@ -150,15 +145,14 @@ static void showUsage() { std::cout << "speech_sample [OPTION]" << std::endl; std::cout << "Options:" << std::endl; std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; + std::cout << " -h " << help_message << std::endl; std::cout << " -i \"\" " << input_message << std::endl; std::cout << " -m \"\" " << model_message << std::endl; std::cout << " -o \"\" " << output_message << std::endl; std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -p " << plugin_message << std::endl; - std::cout << " -pp " << plugin_path_message << std::endl; - std::cout << " -pc " << performance_counter_message << std::endl; + std::cout << " -p " << plugin_message << std::endl; + std::cout << " -pc " << performance_counter_message << std::endl; std::cout << " -q \"\" " << quantization_message << std::endl; std::cout << " -qb \"\" " << quantization_bits_message << std::endl; std::cout << " -sf \"\" " << scale_factor_message << std::endl; @@ -168,6 +162,7 @@ static void showUsage() { std::cout << " -wg \"\" " << write_gna_model_message << std::endl; std::cout << " -we \"\" " << write_embedded_model_message << std::endl; std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; - std::cout << " -cw \"\" " << context_window_message << std::endl; + std::cout << " -cw_l \"\" " << context_window_message_l << std::endl; + std::cout << " -cw_r \"\" " << context_window_message_r << std::endl; } diff --git a/inference-engine/samples/style_transfer_sample/CMakeLists.txt b/inference-engine/samples/style_transfer_sample/CMakeLists.txt index ac2a1707c0d0cb..e71e8d8f7fed51 100644 --- a/inference-engine/samples/style_transfer_sample/CMakeLists.txt +++ b/inference-engine/samples/style_transfer_sample/CMakeLists.txt @@ -2,34 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # -set (TARGET_NAME "style_transfer_sample") - -file (GLOB MAIN_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -file (GLOB MAIN_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${MAIN_SRC}) -source_group("include" FILES ${MAIN_HEADERS}) - - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS}) - -add_dependencies(${TARGET_NAME} gflags) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) - -target_link_libraries(${TARGET_NAME} format_reader IE::ie_cpu_extension ${InferenceEngine_LIBRARIES} gflags) - -if(UNIX) - target_link_libraries( ${TARGET_NAME} ${LIB_DL} pthread) -endif() +ie_add_sample(NAME style_transfer_sample + SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" + HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/style_transfer_sample.h" + DEPENDENCIES format_reader) diff --git a/inference-engine/samples/style_transfer_sample/README.md b/inference-engine/samples/style_transfer_sample/README.md index f2e1c87c9301f5..402769f14d20f2 100644 --- a/inference-engine/samples/style_transfer_sample/README.md +++ b/inference-engine/samples/style_transfer_sample/README.md @@ -22,10 +22,7 @@ Options: -h Print a usage message -i "" Required. Path to a .bmp image file or a sequence of paths separated by spaces. -m "" Required. Path to an .xml file with a trained model. - -pp "" Path to a plugin folder - -d "" The target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. The sample looks for a suitable plugin for the device specified. - -ni "" Number of iterations. Default value is 1 - -pc Enables per-layer performance report + -d "" The target device to infer on (the list of available devices is shown below). Default value is CPU. Use "-d HETERO:" format to specify HETERO plugin. The sample looks for a suitable plugin for the device specified. -mean_val_r, -mean_val_g, -mean_val_b Mean values. Required if the model needs mean values for preprocessing and postprocessing @@ -34,7 +31,7 @@ Options: Running the application with the empty list of options yields the usage message given above and an error message. -To perform inference on an image using a trained model of NST network on Intel® CPUs, use the following command: +To perform inference of an image using a trained model of NST network on Intel® CPUs, use the following command: ```sh ./style_transfer_sample -i /cat.bmp -m /1_decoder_FP32.xml ``` diff --git a/inference-engine/samples/style_transfer_sample/main.cpp b/inference-engine/samples/style_transfer_sample/main.cpp index 9e943e376ba0b2..e983b89c6e3148 100644 --- a/inference-engine/samples/style_transfer_sample/main.cpp +++ b/inference-engine/samples/style_transfer_sample/main.cpp @@ -7,11 +7,8 @@ * @file style_transfer_sample/main.cpp * @example style_transfer_sample/main.cpp */ -#include -#include #include #include -#include #include #include @@ -33,13 +30,10 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) { gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { showUsage(); + showAvailableDevices(); return false; } - if (FLAGS_ni < 1) { - throw std::logic_error("Parameter -ni should be more than 0 !!! (default 1)"); - } - if (FLAGS_i.empty()) { throw std::logic_error("Parameter -i is not set"); } @@ -65,12 +59,13 @@ int main(int argc, char *argv[]) { if (imageNames.empty()) throw std::logic_error("No suitable images were found"); // ----------------------------------------------------------------------------------------------------- - // --------------------------- 1. Load Plugin for inference engine ------------------------------------- - slog::info << "Loading plugin" << slog::endl; - InferencePlugin plugin = PluginDispatcher({FLAGS_pp}).getPluginByDevice(FLAGS_d); + // --------------------------- 1. Load inference engine ------------------------------------- + slog::info << "Loading Inference Engine" << slog::endl; + Core ie; - /** Printing plugin version **/ - printPluginVersion(plugin, std::cout); + /** Printing device version **/ + slog::info << "Device info: " << slog::endl; + std::cout << ie.GetVersions(FLAGS_d) << std::endl; /** Loading default extensions **/ if (FLAGS_d.find("CPU") != std::string::npos) { @@ -79,18 +74,18 @@ int main(int argc, char *argv[]) { * custom MKLDNNPlugin layer implementations. These layers are not supported * by mkldnn, but they can be useful for inferring custom topologies. **/ - plugin.AddExtension(std::make_shared()); + ie.AddExtension(std::make_shared(), "CPU"); } if (!FLAGS_l.empty()) { // CPU(MKLDNN) extensions are loaded as a shared library and passed as a pointer to base extension IExtensionPtr extension_ptr = make_so_pointer(FLAGS_l); - plugin.AddExtension(extension_ptr); + ie.AddExtension(extension_ptr, "CPU"); slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; } if (!FLAGS_c.empty()) { // clDNN Extensions are loaded from an .xml description and OpenCL kernel files - plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}); + ie.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}, "GPU"); slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; } // ----------------------------------------------------------------------------------------------------- @@ -123,7 +118,7 @@ int main(int argc, char *argv[]) { std::vector> imagesData; /** Specifying the precision of input data. - * This should be called before load of the network to the plugin **/ + * This should be called before load of the network to the device **/ inputInfoItem.second->setPrecision(Precision::FP32); /** Collect images data ptrs **/ @@ -170,12 +165,13 @@ int main(int argc, char *argv[]) { } // ----------------------------------------------------------------------------------------------------- - // --------------------------- 4. Loading model to the plugin ------------------------------------------ - slog::info << "Loading model to the plugin" << slog::endl; - ExecutableNetwork executable_network = plugin.LoadNetwork(network, {}); + // --------------------------- 4. Loading model to the device ------------------------------------------ + slog::info << "Loading model to the device" << slog::endl; + ExecutableNetwork executable_network = ie.LoadNetwork(network, FLAGS_d); // ----------------------------------------------------------------------------------------------------- // --------------------------- 5. Create infer request ------------------------------------------------- + slog::info << "Create infer request" << slog::endl; InferRequest infer_request = executable_network.CreateInferRequest(); // ----------------------------------------------------------------------------------------------------- @@ -205,30 +201,8 @@ int main(int argc, char *argv[]) { // ----------------------------------------------------------------------------------------------------- // --------------------------- 7. Do inference --------------------------------------------------------- - slog::info << "Start inference (" << FLAGS_ni << " iterations)" << slog::endl; - - typedef std::chrono::high_resolution_clock Time; - typedef std::chrono::duration> ms; - typedef std::chrono::duration fsec; - - double total = 0.0; - /** Start inference & calc performance **/ - for (size_t iter = 0; iter < FLAGS_ni; ++iter) { - auto t0 = Time::now(); - infer_request.Infer(); - auto t1 = Time::now(); - fsec fs = t1 - t0; - ms d = std::chrono::duration_cast(fs); - total += d.count(); - } - - /** Show performance results **/ - std::cout << std::endl << "Average running time of one iteration: " << total / static_cast(FLAGS_ni) - << " ms" << std::endl; - - if (FLAGS_pc) { - printPerformanceCounts(infer_request, std::cout); - } + slog::info << "Start inference" << slog::endl; + infer_request.Infer(); // ----------------------------------------------------------------------------------------------------- // --------------------------- 8. Process output ------------------------------------------------------- @@ -295,5 +269,7 @@ int main(int argc, char *argv[]) { } slog::info << "Execution successful" << slog::endl; + slog::info << slog::endl << "This sample is an API example, for any performance measurements " + "please use the dedicated benchmark_app tool" << slog::endl; return 0; } diff --git a/inference-engine/samples/style_transfer_sample/style_transfer_sample.h b/inference-engine/samples/style_transfer_sample/style_transfer_sample.h index 9af35b1962d976..928e63436dc0f0 100644 --- a/inference-engine/samples/style_transfer_sample/style_transfer_sample.h +++ b/inference-engine/samples/style_transfer_sample/style_transfer_sample.h @@ -9,21 +9,12 @@ #include #include -#ifdef _WIN32 -#include -#else -#include -#endif - /// @brief message for help argument static const char help_message[] = "Print a usage message."; /// @brief message for images argument static const char image_message[] = "Required. Path to an .bmp image."; -/// @brief message for plugin_path argument -static const char plugin_path_message[] = "Optional. Path to a plugin folder."; - /// @brief message for model argument static const char model_message[] = "Required. Path to an .xml file with a trained model.";\ @@ -32,14 +23,9 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If "the sample will look for this plugin only"; /// @brief message for assigning cnn calculation to device -static const char target_device_message[] = "Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \ -"Sample will look for a suitable plugin for device specified"; - -/// @brief message for performance counters -static const char performance_counter_message[] = "Optional. Enables per-layer performance report"; - -/// @brief message for iterations count -static const char iterations_count_message[] = "Optional. Number of iterations. Default value is 1"; +static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " \ + "Default value is CPU. Use \"-d HETERO:\" format to specify HETERO plugin. " \ + "Sample will look for a suitable plugin for device specified"; /// @brief message for user library argument static const char custom_cpu_library_message[] = "Optional. Required for CPU custom layers." \ @@ -65,19 +51,9 @@ DEFINE_string(i, "", image_message); /// It is a required parameter DEFINE_string(m, "", model_message); -/// @brief Define parameter for set path to plugins
-/// Default is ./lib -DEFINE_string(pp, "", plugin_path_message); - /// @brief device the target device to infer on
DEFINE_string(d, "CPU", target_device_message); -/// \brief Enable per-layer performance report -DEFINE_bool(pc, false, performance_counter_message); - -/// @brief Iterations count (default 1) -DEFINE_uint32(ni, 1, iterations_count_message); - /// @brief Absolute path to CPU library with user layers
/// It is a required parameter DEFINE_string(l, "", custom_cpu_library_message); @@ -103,10 +79,7 @@ static void showUsage() { std::cout << " -h " << help_message << std::endl; std::cout << " -i \"\" " << image_message << std::endl; std::cout << " -m \"\" " << model_message << std::endl; - std::cout << " -pp \"\" " << plugin_path_message << std::endl; std::cout << " -d \"\" " << target_device_message << std::endl; - std::cout << " -ni \"\" " << iterations_count_message << std::endl; - std::cout << " -pc " << performance_counter_message << std::endl; std::cout << " -mean_val_r," << std::endl; std::cout << " -mean_val_g," << std::endl; std::cout << " -mean_val_b " << preprocess_data_message << std::endl; diff --git a/inference-engine/samples/validation_app/CMakeLists.txt b/inference-engine/samples/validation_app/CMakeLists.txt deleted file mode 100644 index 87b337c0d37ab8..00000000000000 --- a/inference-engine/samples/validation_app/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2018-2019 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -set (TARGET_NAME "validation_app") - -file (GLOB MAIN_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pugixml/*.cpp - ) - -file (GLOB MAIN_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/pugixml/*.hpp - ) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${MAIN_SRC}) -source_group("include" FILES ${MAIN_HEADERS}) - -# Find OpenCV components if exist -find_package(OpenCV COMPONENTS imgcodecs imgproc QUIET) -if(NOT(OpenCV_FOUND)) - message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped") - return() -endif() - -# Properties->C/C++->General->Additional Include Directories -include_directories (${CMAKE_CURRENT_SOURCE_DIR}/../classification_sample/core - ${CMAKE_CURRENT_SOURCE_DIR}/../common - ${CMAKE_CURRENT_SOURCE_DIR}/../common/os/windows - ${CMAKE_CURRENT_SOURCE_DIR}/../../include) - -link_directories(${LIB_FOLDER}) - -# Create library file from sources. -add_executable(${TARGET_NAME} ${MAIN_SRC} ${MAIN_HEADERS}) - -set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE" -COMPILE_PDB_NAME ${TARGET_NAME}) -target_link_libraries(${TARGET_NAME} gflags IE::ie_cpu_extension ${InferenceEngine_LIBRARIES} ${OpenCV_LIBRARIES}) -if (UNIX) - target_link_libraries(${TARGET_NAME} dl) -endif() - diff --git a/inference-engine/samples/validation_app/ClassificationProcessor.cpp b/inference-engine/samples/validation_app/ClassificationProcessor.cpp deleted file mode 100644 index 7db4b3219d6f16..00000000000000 --- a/inference-engine/samples/validation_app/ClassificationProcessor.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include - -#include "ClassificationProcessor.hpp" -#include "Processor.hpp" - -using InferenceEngine::details::InferenceEngineException; - -ClassificationProcessor::ClassificationProcessor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, int flags_b, - InferencePlugin plugin, CsvDumper& dumper, const std::string& flags_l, - PreprocessingOptions preprocessingOptions, bool zeroBackground) - : Processor(flags_m, flags_d, flags_i, flags_b, plugin, dumper, "Classification network", preprocessingOptions), zeroBackground(zeroBackground) { - - // Change path to labels file if necessary - if (flags_l.empty()) { - labelFileName = fileNameNoExt(modelFileName) + ".labels"; - } else { - labelFileName = flags_l; - } -} - -ClassificationProcessor::ClassificationProcessor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, int flags_b, - InferencePlugin plugin, CsvDumper& dumper, const std::string& flags_l, bool zeroBackground) - : ClassificationProcessor(flags_m, flags_d, flags_i, flags_b, plugin, dumper, flags_l, - PreprocessingOptions(false, ResizeCropPolicy::ResizeThenCrop, 256, 256), zeroBackground) { -} - -std::shared_ptr ClassificationProcessor::Process(bool stream_output) { - slog::info << "Collecting labels" << slog::endl; - ClassificationSetGenerator generator; - try { - generator.readLabels(labelFileName); - } catch (InferenceEngine::details::InferenceEngineException& ex) { - slog::warn << "Can't read labels file " << labelFileName << slog::endl; - slog::warn << "Error: " << ex.what() << slog::endl; - } - - auto validationMap = generator.getValidationMap(imagesPath); - ImageDecoder decoder; - - // ----------------------------Do inference------------------------------------------------------------- - slog::info << "Starting inference" << slog::endl; - - std::vector expected(batch); - std::vector files(batch); - - ConsoleProgress progress(validationMap.size(), stream_output); - - ClassificationInferenceMetrics im; - - std::string firstInputName = this->inputInfo.begin()->first; - std::string firstOutputName = this->outInfo.begin()->first; - auto firstInputBlob = inferRequest.GetBlob(firstInputName); - auto firstOutputBlob = inferRequest.GetBlob(firstOutputName); - - auto iter = validationMap.begin(); - while (iter != validationMap.end()) { - size_t b = 0; - int filesWatched = 0; - for (; b < batch && iter != validationMap.end(); b++, iter++, filesWatched++) { - expected[b] = iter->first; - try { - decoder.insertIntoBlob(iter->second, b, *firstInputBlob, preprocessingOptions); - files[b] = iter->second; - } catch (const InferenceEngineException& iex) { - slog::warn << "Can't read file " << iter->second << slog::endl; - slog::warn << "Error: " << iex.what() << slog::endl; - // Could be some non-image file in directory - b--; - continue; - } - } - - Infer(progress, filesWatched, im); - - std::vector results; - auto firstOutputData = firstOutputBlob->buffer().as::value_type*>(); - InferenceEngine::TopResults(TOP_COUNT, *firstOutputBlob, results); - - for (size_t i = 0; i < b; i++) { - int expc = expected[i]; - if (zeroBackground) expc++; - - bool top1Scored = (static_cast(results[0 + TOP_COUNT * i]) == expc); - dumper << "\"" + files[i] + "\"" << top1Scored; - if (top1Scored) im.top1Result++; - for (int j = 0; j < TOP_COUNT; j++) { - unsigned classId = results[j + TOP_COUNT * i]; - if (static_cast(classId) == expc) { - im.topCountResult++; - } - dumper << classId << firstOutputData[classId + i * (firstOutputBlob->size() / batch)]; - } - dumper.endLine(); - im.total++; - } - } - progress.finish(); - - return std::shared_ptr(new ClassificationInferenceMetrics(im)); -} - -void ClassificationProcessor::Report(const Processor::InferenceMetrics& im) { - Processor::Report(im); - if (im.nRuns > 0) { - const ClassificationInferenceMetrics& cim = dynamic_cast(im); - - cout << "Top1 accuracy: " << OUTPUT_FLOATING(100.0 * cim.top1Result / cim.total) << "% (" << cim.top1Result << " of " - << cim.total << " images were detected correctly, top class is correct)" << "\n"; - cout << "Top5 accuracy: " << OUTPUT_FLOATING(100.0 * cim.topCountResult / cim.total) << "% (" << cim.topCountResult << " of " - << cim.total << " images were detected correctly, top five classes contain required class)" << "\n"; - } -} - diff --git a/inference-engine/samples/validation_app/ClassificationProcessor.hpp b/inference-engine/samples/validation_app/ClassificationProcessor.hpp deleted file mode 100644 index e7a6c9486a1fcf..00000000000000 --- a/inference-engine/samples/validation_app/ClassificationProcessor.hpp +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - -#include "classification_set_generator.hpp" -#include "Processor.hpp" - -using namespace std; - -class ClassificationProcessor : public Processor { - const int TOP_COUNT = 5; - - struct ClassificationInferenceMetrics : public InferenceMetrics { - public: - int top1Result = 0; - int topCountResult = 0; - int total = 0; - }; - -protected: - std::string labelFileName; - bool zeroBackground; -public: - ClassificationProcessor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, int flags_b, - InferenceEngine::InferencePlugin plugin, CsvDumper& dumper, const std::string& flags_l, - PreprocessingOptions preprocessingOptions, bool zeroBackground); - ClassificationProcessor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, int flags_b, - InferenceEngine::InferencePlugin plugin, CsvDumper& dumper, const std::string& flags_l, bool zeroBackground); - - std::shared_ptr Process(bool stream_output); - virtual void Report(const InferenceMetrics& im); - virtual ~ClassificationProcessor() { } -}; diff --git a/inference-engine/samples/validation_app/ObjectDetectionProcessor.cpp b/inference-engine/samples/validation_app/ObjectDetectionProcessor.cpp deleted file mode 100644 index 8e3a23e4f1ad43..00000000000000 --- a/inference-engine/samples/validation_app/ObjectDetectionProcessor.cpp +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include -#include - -#include "ObjectDetectionProcessor.hpp" -#include "Processor.hpp" -#include "user_exception.hpp" - -#include -#include - -using InferenceEngine::details::InferenceEngineException; - -ObjectDetectionProcessor::ObjectDetectionProcessor(const std::string& flags_m, const std::string& flags_d, - const std::string& flags_i, const std::string& subdir, int flags_b, - double threshold, InferenceEngine::InferencePlugin plugin, CsvDumper& dumper, - const std::string& flags_a, const std::string& classes_list_file, PreprocessingOptions preprocessingOptions, bool scaleProposalToInputSize) - : Processor(flags_m, flags_d, flags_i, flags_b, plugin, dumper, "Object detection network", preprocessingOptions), - annotationsPath(flags_a), subdir(subdir), threshold(threshold), scaleProposalToInputSize(scaleProposalToInputSize) { - std::ifstream clf(classes_list_file); - if (!clf) { - throw UserException(1) << "Classes list file \"" << classes_list_file << "\" not found or inaccessible"; - } - - while (!clf.eof()) { - std::string line; - std::getline(clf, line, '\n'); - - if (line != "") { - istringstream lss(line); - std::string id; - lss >> id; - int class_index = 0; - lss >> class_index; - - classes.insert(std::pair(id, class_index)); - } - } -} - -shared_ptr ObjectDetectionProcessor::Process(bool stream_output) { - // Parsing PASCAL VOC2012 format - VOCAnnotationParser vocAnnParser; - slog::info << "Collecting VOC annotations from " << annotationsPath << slog::endl; - VOCAnnotationCollector annCollector(annotationsPath); - slog::info << annCollector.annotations().size() << " annotations collected" << slog::endl; - - if (annCollector.annotations().size() == 0) { - ObjectDetectionInferenceMetrics emptyIM(this->threshold); - - return std::shared_ptr(new ObjectDetectionInferenceMetrics(emptyIM)); - } - - // Getting desired results from annotations - std::map desiredForFiles; - - for (auto& ann : annCollector.annotations()) { - std::list dobList; - for (auto& obj : ann.objects) { - DetectedObject dob(classes[obj.name], static_cast(obj.bndbox.xmin), - static_cast(obj.bndbox.ymin), static_cast(obj.bndbox.xmax), - static_cast(obj.bndbox.ymax), 1.0f, obj.difficult != 0); - dobList.push_back(dob); - } - ImageDescription id(dobList); - desiredForFiles.insert(std::pair(ann.folder + "/" + (!subdir.empty() ? subdir + "/" : "") + ann.filename, id)); - } - - for (auto & item : outInfo) { - DataPtr outputData = item.second; - if (!outputData) { - throw std::logic_error("output data pointer is not valid"); - } - } - // ----------------------------------------------------------------------------------------------------- - - // ----------------------------Do inference------------------------------------------------------------- - slog::info << "Starting inference" << slog::endl; - - std::vector expected(batch); - - ConsoleProgress progress(annCollector.annotations().size(), stream_output); - - ObjectDetectionInferenceMetrics im(threshold); - - vector::const_iterator iter = annCollector.annotations().begin(); - - std::map scaledDesiredForFiles; - - std::string firstInputName = this->inputInfo.begin()->first; - auto firstInputBlob = inferRequest.GetBlob(firstInputName); - - while (iter != annCollector.annotations().end()) { - std::vector files; - size_t b = 0; - - int filesWatched = 0; - for (; b < batch && iter != annCollector.annotations().end(); b++, iter++, filesWatched++) { - expected[b] = *iter; - string filename = iter->folder + "/" + (!subdir.empty() ? subdir + "/" : "") + iter->filename; - try { - float scale_x, scale_y; - - scale_x = 1.0f / iter->size.width; // orig_size.width; - scale_y = 1.0f / iter->size.height; // orig_size.height; - - if (scaleProposalToInputSize) { - scale_x *= firstInputBlob->dims()[0]; - scale_y *= firstInputBlob->dims()[1]; - } - - // Scaling the desired result (taken from the annotation) to the network size - scaledDesiredForFiles.insert(std::pair(filename, desiredForFiles.at(filename).scale(scale_x, scale_y))); - - files.push_back(filename); - } catch (const InferenceEngineException& iex) { - slog::warn << "Can't read file " << this->imagesPath + "/" + filename << slog::endl; - slog::warn << "Error: " << iex.what() << slog::endl; - // Could be some non-image file in directory - b--; - continue; - } - } - - if (files.size() == batch) { - // Infer model - Infer(progress, filesWatched, im); - - // Processing the inference result - std::map> detectedObjects = processResult(files); - - // Calculating similarity - // - for (size_t b = 0; b < files.size(); b++) { - ImageDescription result(detectedObjects[files[b]]); - im.apc.consumeImage(result, scaledDesiredForFiles.at(files[b])); - } - } - } - progress.finish(); - - // ----------------------------------------------------------------------------------------------------- - - // ---------------------------Postprocess output blobs-------------------------------------------------- - slog::info << "Processing output blobs" << slog::endl; - - return std::shared_ptr(new ObjectDetectionInferenceMetrics(im)); -} - -void ObjectDetectionProcessor::Report(const Processor::InferenceMetrics& im) { - const ObjectDetectionInferenceMetrics& odim = dynamic_cast(im); - Processor::Report(im); - if (im.nRuns > 0) { - std::map appc = odim.apc.calculateAveragePrecisionPerClass(); - - std::cout << "Average precision per class table: " << std::endl << std::endl; - std::cout << "Class\tAP" << std::endl; - - double mAP = 0; - for (auto i : appc) { - std::cout << std::fixed << std::setprecision(3) << i.first << "\t" << i.second << std::endl; - mAP += i.second; - } - mAP /= appc.size(); - std::cout << std::endl << std::fixed << std::setprecision(4) << "Mean Average Precision (mAP): " << mAP << std::endl; - } -} diff --git a/inference-engine/samples/validation_app/ObjectDetectionProcessor.hpp b/inference-engine/samples/validation_app/ObjectDetectionProcessor.hpp deleted file mode 100644 index 7a277105fe4dd9..00000000000000 --- a/inference-engine/samples/validation_app/ObjectDetectionProcessor.hpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "Processor.hpp" - -#include "VOCAnnotationParser.hpp" - -using namespace std; - -class ObjectDetectionProcessor : public Processor { -public: - struct ObjectDetectionInferenceMetrics : public InferenceMetrics { - public: - AveragePrecisionCalculator apc; - - explicit ObjectDetectionInferenceMetrics(double threshold) : apc(threshold) { } - }; - -protected: - std::string annotationsPath; - std::string subdir; - std::map classes; - double threshold; - - bool scaleProposalToInputSize; - - virtual std::map> processResult(std::vector files) = 0; - -public: - ObjectDetectionProcessor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, const std::string& subdir, int flags_b, - double threshold, - InferenceEngine::InferencePlugin plugin, CsvDumper& dumper, - const std::string& flags_a, const std::string& classes_list_file, PreprocessingOptions preprocessingOptions, bool scaleSizeToInputSize); - - shared_ptr Process(bool stream_output); - virtual void Report(const InferenceMetrics& im); - virtual ~ObjectDetectionProcessor() {} -}; diff --git a/inference-engine/samples/validation_app/Processor.cpp b/inference-engine/samples/validation_app/Processor.cpp deleted file mode 100644 index cf8e73b9e10a88..00000000000000 --- a/inference-engine/samples/validation_app/Processor.cpp +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include - -#include - -#include "Processor.hpp" - -using namespace InferenceEngine; - -Processor::Processor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, int flags_b, - InferencePlugin plugin, CsvDumper& dumper, const std::string& approach, PreprocessingOptions preprocessingOptions) - - : modelFileName(flags_m), targetDevice(flags_d), imagesPath(flags_i), batch(flags_b), - preprocessingOptions(preprocessingOptions), dumper(dumper), plugin(plugin), approach(approach) { - - // --------------------Load network (Generated xml/bin files)------------------------------------------- - slog::info << "Loading network files" << slog::endl; - - loadDuration = getDurationOf([&]() { - /** Read network model **/ - networkReader.ReadNetwork(modelFileName); - if (!networkReader.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model"; - - /** Extract model name and load weights **/ - std::string binFileName = fileNameNoExt(modelFileName) + ".bin"; - networkReader.ReadWeights(binFileName.c_str()); - }); - // ----------------------------------------------------------------------------------------------------- - - // -----------------------------Prepare input blobs----------------------------------------------------- - slog::info << "Preparing input blobs" << slog::endl; - - /** Taking information about all topology inputs **/ - inputInfo = InputsDataMap(networkReader.getNetwork().getInputsInfo()); - - /** Stores all input blobs data **/ - - // TODO Check if it's necessary - if (!targetDevice.empty()) { - networkReader.getNetwork().setTargetDevice(getDeviceFromStr(targetDevice)); - } - - if (batch == 0) { - // Zero means "take batch value from the IR" - batch = networkReader.getNetwork().getBatchSize(); - } else { - // Not zero means "use the specified value" - auto network = networkReader.getNetwork(); - auto input_shapes = network.getInputShapes(); - std::string input_name; - SizeVector input_shape; - std::tie(input_name, input_shape) = *input_shapes.begin(); - input_shape[0] = batch; - input_shapes[input_name] = input_shape; - network.reshape(input_shapes); - } - - if (inputInfo.size() != 1) { - THROW_IE_EXCEPTION << "This app accepts networks having only one input"; - } - - for (auto & item : inputInfo) { - inputDims = item.second->getDims(); - slog::info << "Batch size is " << std::to_string(networkReader.getNetwork().getBatchSize()) << slog::endl; - } - - outInfo = networkReader.getNetwork().getOutputsInfo(); - DataPtr outData = outInfo.begin()->second; - - // Set the precision of output data provided by the user, should be called before load of the network to the plugin - if (!outData) { - throw std::logic_error("output data pointer is not valid"); - } - outData->setPrecision(Precision::FP32); - if (outInfo.size() != 1) { - THROW_IE_EXCEPTION << "This app accepts networks having only one output"; - } - if (!outData) { - THROW_IE_EXCEPTION << "The network output info is not valid"; - } - - outputDims = outData->dims; - - // Load model to plugin and create an inference request - - ExecutableNetwork executable_network = plugin.LoadNetwork(networkReader.getNetwork(), {}); - inferRequest = executable_network.CreateInferRequest(); -} - -double Processor::Infer(ConsoleProgress& progress, int filesWatched, InferenceMetrics& im) { - ResponseDesc dsc; - - // InferencePlugin plugin(enginePtr); - - // Infer model - double time = getDurationOf([&]() { - inferRequest.Infer(); - }); - - im.maxDuration = std::min(im.maxDuration, time); - im.minDuration = std::max(im.minDuration, time); - im.totalTime += time; - im.nRuns++; - - progress.addProgress(filesWatched); - - return time; -} diff --git a/inference-engine/samples/validation_app/Processor.hpp b/inference-engine/samples/validation_app/Processor.hpp deleted file mode 100644 index 22ce3b613062f1..00000000000000 --- a/inference-engine/samples/validation_app/Processor.hpp +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include - -#include - -#include "inference_engine.hpp" - -#include "samples/csv_dumper.hpp" -#include "image_decoder.hpp" -#include "samples/console_progress.hpp" - -using namespace std; - -#define OUTPUT_FLOATING(val) std::fixed << std::setprecision(2) << val - -class Processor { -public: - struct InferenceMetrics { - int nRuns = 0; - double minDuration = std::numeric_limits::max(); - double maxDuration = 0; - double totalTime = 0; - - virtual ~InferenceMetrics() { } // Type has to be polymorphic - }; - -protected: - std::string modelFileName; - std::string targetDevice; - std::string imagesPath; - size_t batch; - InferenceEngine::InferRequest inferRequest; - InferenceEngine::InputsDataMap inputInfo; - InferenceEngine::OutputsDataMap outInfo; - InferenceEngine::CNNNetReader networkReader; - InferenceEngine::SizeVector inputDims; - InferenceEngine::SizeVector outputDims; - double loadDuration; - PreprocessingOptions preprocessingOptions; - - CsvDumper& dumper; - InferencePlugin plugin; - - std::string approach; - - double Infer(ConsoleProgress& progress, int filesWatched, InferenceMetrics& im); - -public: - Processor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, int flags_b, - InferenceEngine::InferencePlugin plugin, CsvDumper& dumper, const std::string& approach, PreprocessingOptions preprocessingOptions); - - virtual shared_ptr Process(bool stream_output = false) = 0; - virtual void Report(const InferenceMetrics& im) { - double averageTime = im.totalTime / im.nRuns; - - slog::info << "Inference report:\n"; - slog::info << "\tNetwork load time: " << loadDuration << "ms" << "\n"; - slog::info << "\tModel: " << modelFileName << "\n"; - slog::info << "\tModel Precision: " << networkReader.getNetwork().getPrecision().name() << "\n"; - slog::info << "\tBatch size: " << batch << "\n"; - slog::info << "\tValidation dataset: " << imagesPath << "\n"; - slog::info << "\tValidation approach: " << approach; - slog::info << slog::endl; - - if (im.nRuns > 0) { - slog::info << "Average infer time (ms): " << averageTime << " (" << OUTPUT_FLOATING(1000.0 / (averageTime / batch)) - << " images per second with batch size = " << batch << ")" << slog::endl; - } else { - slog::warn << "No images processed" << slog::endl; - } - } - - virtual ~Processor() {} -}; diff --git a/inference-engine/samples/validation_app/README.md b/inference-engine/samples/validation_app/README.md deleted file mode 100644 index 042150bb3ca1e7..00000000000000 --- a/inference-engine/samples/validation_app/README.md +++ /dev/null @@ -1,294 +0,0 @@ -# Validation Application - -Inference Engine Validation Application is a tool that allows to infer deep learning models with -standard inputs and outputs configuration and to collect simple -validation metrics for topologies. It supports **top-1** and **top-5** metric for Classification networks and -11-points **mAP** metric for Object Detection networks. - -> **NOTE**: Before running the application with trained models, make sure the models are converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -Possible use cases of the tool: -* Check if the Inference Engine infers the public topologies well (the engineering team uses the Validation Application for - regular testing) -* Verify if a custom model is compatible with the default input/output configuration and compare its - accuracy with the public models -* Use Validation Application as another sample: although the code is much more complex than in classification and object - detection samples, the source code is open and can be re-used. - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md). - -## Validation Application Options - -The Validation Application provides the following command-line interface (CLI): -```sh -Usage: validation_app [OPTION] - -Available options: - - -h Print a help message - -t Type of an inferred network ("C" by default) - -t "C" for classification - -t "OD" for object detection - -i Required. Folder with validation images. Path to a directory with validation images. For Classification models, the directory must contain folders named as labels with images inside or a .txt file with a list of images. For Object Detection models, the dataset must be in VOC format. - -m Required. Path to an .xml file with a trained model - -lbl Labels file path. The labels file contains names of the dataset classes - -l Required for CPU custom layers. Absolute path to a shared library with the kernel implementations - -c Required for GPU custom kernels. Absolute path to an .xml file with the kernel descriptions. - -d Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD. The application looks for a suitable plugin for the specified device. - -b N Batch size value. If not specified, the batch size value is taken from IR - -ppType Preprocessing type. Options: "None", "Resize", "ResizeCrop" - -ppSize N Preprocessing size (used with ppType="ResizeCrop") - -ppWidth W Preprocessing width (overrides -ppSize, used with ppType="ResizeCrop") - -ppHeight H Preprocessing height (overrides -ppSize, used with ppType="ResizeCrop") - --dump Dump file names and inference results to a .csv file - - Classification-specific options: - -Czb true "Zero is a background" flag. Some networks are trained with a modified dataset where the class IDs are enumerated from 1, but 0 is an undefined "background" class (which is never detected) - - Object detection-specific options: - -ODkind Type of an Object Detection model. Options: SSD - -ODa Required for Object Detection models. Path to a directory containing an .xml file with annotations for images. - -ODc Required for Object Detection models. Path to a file containing a list of classes - -ODsubdir Directory between the path to images (specified with -i) and image name (specified in the .xml file). For VOC2007 dataset, use JPEGImages. -``` -The tool options are divided into two categories: -1. **Common options** named with a single letter or a word, such as `-b` or `--dump`. - These options are the same in all Validation Application modes. -2. **Network type-specific options** named as an acronym of the network type (`C` or `OD`) - followed by a letter or a word. - -## General Workflow - -> **NOTE**: By default, Inference Engine samples expect input images to have BGR channels order. If you trained you model to work with images in RGB order, you need to manually rearrange the default channels order in the sample application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to [When to Reverse Input Channels](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md#when_to_reverse_input_channels). - -When executed, the Validation Application perform the following steps: - -1. Loads a model to an Inference Engine plugin -2. Reads validation set (specified with the `-i` option): - - if you specified a directory, the application tries to load labels first. To do this, it searches for the file - with the same name as a model, but with `.labels` extension (instead of `.xml`). - Then it searches for the specified folder, detects its sub-folders named as known labels, and adds all images from these sub-folders to the validation set. When there are no such sub-folders, validation set is considered empty. - - if you specified a `.txt` file, the application reads this file expecting every line to be in the correct format. - For more information about the format, refer to the Preparing the Dataset section below. - -3. Reads the batch size value specified with the `-b` option and loads this number of images to the plugin - > **NOTE**: Images loading time is not a part of inference time reported by the application. - -4. The plugin infers the model, and the Validation Application collects the statistics. - -You can also retrieve infer result by specifying the `--dump` option, however it generates a report only -for Classification models. This CLI option enables creation (if possible) of an inference report in -the `.csv` format. - -The structure of the report is a set of lines, each of them contains semicolon-separated values: -* image path -* a flag representing correctness of prediction -* ID of Top-1 class -* probability that the image belongs to Top-1 class in per cents -* ID of Top-2 class -* probability that the image belongs to Top-2 class in per cents -* - -This is an example line from such report: -```bash -"ILSVRC2012_val_00002138.bmp";1;1;8.5;392;6.875;123;5.875;2;5.5;396;5; -``` -It means that the given image was predicted correctly. The most probable prediction is that this image -represents class *1* with the probability *0.085*. - -## Prepare a Dataset - -You must prepare the dataset before running the Validation Application. The format of dataset depends on -a type of the model you are going to validate. Make sure that the dataset is format is applicable -for the chosen model type. - -### Dataset Format for Classification: Folders as Classes - -In this case, a dataset has the following structure: -```sh -|-- /dataset - |-- apron - |-- apron1.bmp - |-- apron2.bmp - |-- collie - |-- a_big_dog.jpg - |-- coral reef - |-- reef.bmp - |-- Siamese - |-- cat3.jpg -``` - -This structure means that each folder in dataset directory must have the name of one of the classes and contain all images of this class. In the given example, there are two images that represent the class `apron`, while three other classes have only one image -each. - -> **NOTE:** A dataset can contain images of both `.bmp` and `.jpg` formats. - -The correct way to use such dataset is to specify the path as `-i /dataset`. - -### Dataset Format for Classification: List of Images (ImageNet-like) - -If you want to use this dataset format, create a single file with a list of images. In this case, the correct set of files must be similar to the following: -```bash -|-- /dataset - |-- apron1.bmp - |-- apron2.bmp - |-- a_big_dog.jpg - |-- reef.bmp - |-- cat3.jpg - |-- labels.txt -``` - -Where `labels.txt` looks like: -```bash -apron1.bmp 411 -apron2.bmp 411 -cat3.jpg 284 -reef.bmp 973 -a_big_dog.jpg 231 -``` - -Each line of the file must contain the name of the image and the ID of the class -that it represents in the format ` tabulation `. For example, `apron1.bmp` represents the class with ID `411`. - -> **NOTE:** A dataset can contain images of both `.bmp` and `.jpg` formats. - -The correct way to use such dataset is to specify the path as `-i /dataset/labels.txt`. - -### Dataset Format for Object Detection (VOC-like) - -Object Detection SSD models can be inferred on the original dataset that was used as a testing dataset during the model training. -To prepare the VOC dataset, follow the steps below: - -1. Download the pre-trained SSD-300 model from the SSD GitHub* repository at - [https://github.com/weiliu89/caffe/tree/ssd](https://github.com/weiliu89/caffe/tree/ssd). - -2. Download VOC2007 testing dataset: - ```bash - $wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar - tar -xvf VOCtest_06-Nov-2007.tar - ``` -3. Convert the model with the [Model Optimizer](./docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md). - -4. Create a proper `.txt` class file from the original `labelmap_voc.prototxt`. The new file must be in -the following format: -```sh - none_of_the_above 0 - aeroplane 1 - bicycle 2 - bird 3 - boat 4 - bottle 5 - bus 6 - car 7 - cat 8 - chair 9 - cow 10 - diningtable 11 - dog 12 - horse 13 - motorbike 14 - person 15 - pottedplant 16 - sheep 17 - sofa 18 - train 19 - tvmonitor 20 -``` -Save this file as `VOC_SSD_Classes.txt`. - -## Validate Classification Models - -> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -Once you have prepared the dataset (refer to the Preparing the Dataset section above), -run the following command to infer a classification model on the selected dataset: -```bash -./validation_app -t C -i -m /.xml -d -``` - -## Validate Object Detection Models - -> **NOTE**: Validation Application was validated with SSD CNN. Any network that can be inferred by the Inference Engine -> and has the same input and output format as one of these should be supported as well. - -> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -Once you have prepared the dataset (refer to the Preparing the Dataset section above), -run the following command to infer an Object Detection model on the selected dataset: -```bash -./validation_app -d CPU -t OD -ODa "/VOCdevkit/VOC2007/Annotations" -i "/VOCdevkit" -m "/vgg_voc0712_ssd_300x300.xml" -ODc "/VOC_SSD_Classes.txt" -ODsubdir JPEGImages -``` - -## Understand Validation Application Output - -During the validation process, you can see the interactive progress bar that represents the current validation stage. When it is -full, the validation process is over, and you can analyze the output. - -Key data from the output: -* **Network loading time** - time spent on topology loading in ms -* **Model** - path to a chosen model -* **Model Precision** - precision of the chosen model -* **Batch size** - specified batch size -* **Validation dataset** - path to a validation set -* **Validation approach** - type of the model: Classification or Object Detection -* **Device** - device type - -Below you can find the example output for Classification models, which reports average infer time and -**Top-1** and **Top-5** metric values: -```bash -Average infer time (ms): 588.977 (16.98 images per second with batch size = 10) - -Top1 accuracy: 70.00% (7 of 10 images were detected correctly, top class is correct) -Top5 accuracy: 80.00% (8 of 10 images were detected correctly, top five classes contain required class) -``` - -Below you can find the example output for Object Detection models: - -```bash -Progress: [....................] 100.00% done -[ INFO ] Processing output blobs -Network load time: 27.70ms -Model: /home/user/models/ssd/withmean/vgg_voc0712_ssd_300x300/vgg_voc0712_ssd_300x300.xml -Model Precision: FP32 -Batch size: 1 -Validation dataset: /home/user/Data/SSD-data/testonly/VOCdevkit -Validation approach: Object detection network - -Average infer time (ms): 166.49 (6.01 images per second with batch size = 1) -Average precision per class table: - -Class AP -1 0.796 -2 0.839 -3 0.759 -4 0.695 -5 0.508 -6 0.867 -7 0.861 -8 0.886 -9 0.602 -10 0.822 -11 0.768 -12 0.861 -13 0.874 -14 0.842 -15 0.797 -16 0.526 -17 0.792 -18 0.795 -19 0.873 -20 0.773 - -Mean Average Precision (mAP): 0.7767 -``` - -This output shows the resulting `mAP` metric value for the SSD300 model used to prepare the -dataset. This value repeats the result stated in the -[SSD GitHub* repository](https://github.com/weiliu89/caffe/tree/ssd) and in the -[original arXiv paper](http://arxiv.org/abs/1512.02325). - - - -## See Also - -* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md) diff --git a/inference-engine/samples/validation_app/SSDObjectDetectionProcessor.hpp b/inference-engine/samples/validation_app/SSDObjectDetectionProcessor.hpp deleted file mode 100644 index a8dc30e0a1452d..00000000000000 --- a/inference-engine/samples/validation_app/SSDObjectDetectionProcessor.hpp +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "ObjectDetectionProcessor.hpp" - -using namespace std; - -class SSDObjectDetectionProcessor : public ObjectDetectionProcessor { -protected: - std::map> processResult(std::vector files) { - std::map> detectedObjects; - - std::string firstOutputName = this->outInfo.begin()->first; - const auto detectionOutArray = inferRequest.GetBlob(firstOutputName); - const float *box = detectionOutArray->buffer().as(); - - const size_t maxProposalCount = outputDims[1]; - const size_t objectSize = outputDims[0]; - - for (size_t b = 0; b < batch; b++) { - string fn = files[b]; - std::list dr = std::list(); - detectedObjects.insert(std::pair>(fn, dr)); - } - - for (size_t i = 0; i < maxProposalCount; i++) { - float image_id = box[i * objectSize + 0]; - float label = box[i * objectSize + 1]; - float confidence = box[i * objectSize + 2]; - float xmin = box[i * objectSize + 3] * inputDims[0]; - float ymin = box[i * objectSize + 4] * inputDims[1]; - float xmax = box[i * objectSize + 5] * inputDims[0]; - float ymax = box[i * objectSize + 6] * inputDims[1]; - - if (image_id < 0 /* better than check == -1 */) { - break; // Finish - } - - detectedObjects[files[static_cast(image_id)]].push_back( - DetectedObject(static_cast(label), xmin, ymin, xmax, ymax, confidence)); - } - - return detectedObjects; - } - -public: - SSDObjectDetectionProcessor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, const std::string& subdir, int flags_b, - double threshold, - InferencePlugin plugin, CsvDumper& dumper, - const std::string& flags_a, const std::string& classes_list_file) : - - ObjectDetectionProcessor(flags_m, flags_d, flags_i, subdir, flags_b, threshold, - plugin, dumper, flags_a, classes_list_file, PreprocessingOptions(false, ResizeCropPolicy::Resize), true) { } -}; diff --git a/inference-engine/samples/validation_app/VOCAnnotationParser.cpp b/inference-engine/samples/validation_app/VOCAnnotationParser.cpp deleted file mode 100644 index 68e26560126415..00000000000000 --- a/inference-engine/samples/validation_app/VOCAnnotationParser.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include - -#include "VOCAnnotationParser.hpp" - -#include "user_exception.hpp" - -std::string VOCAnnotationParser::parseString(const pugi::xml_node& node, const std::string& def) { - if (node && node.child_value()) { - return node.child_value(); - } else { - return def; - } -} - -int VOCAnnotationParser::parseInt(const pugi::xml_node& node, const int def) { - if (!node) return def; - std::string val = parseString(node); - try { - return std::stoi(val); - } catch (const std::invalid_argument&) { - THROW_USER_EXCEPTION(1) << "Can't convert node <" << node.name() - << "> value \"" << val << "\" to integer"; - } -} - -bool VOCAnnotationParser::parseBool(const pugi::xml_node& node, bool def) { - if (!node) return def; - std::string val = parseString(node); - if (val == "1") return true; - if (val == "0") return false; - - THROW_USER_EXCEPTION(1) << "Can't convert node <" << node.name() - << "> value \"" << val << "\" to boolean"; -} - - -VOCAnnotation VOCAnnotationParser::parse(const std::string& filename) { - using namespace pugi; - xml_document doc; - - xml_parse_result result = doc.load_file(filename.c_str()); - - if (result.status != pugi::status_ok) { - throw UserException(result.status) - << "parsing failed at offset " << result.offset << ": " << result.description(); - } - - xml_node annNode = doc.child("annotation"); - if (!annNode) { - THROW_USER_EXCEPTION(1) << "No root tag"; - } - VOCAnnotation ann; - - try { - ann.filename = parseString(annNode.child("filename")); - ann.folder = parseString(annNode.child("folder")); - ann.segmented = parseBool(annNode.child("segmented")); - - xml_node sizeNode = annNode.child("size"); - ann.size.depth = parseInt(sizeNode.child("depth")); - ann.size.height = parseInt(sizeNode.child("height")); - ann.size.width = parseInt(sizeNode.child("width")); - - xml_node sourceNode = annNode.child("source"); - ann.source.annotation = parseString(sourceNode.child("annotation")); - ann.source.database = parseString(sourceNode.child("database")); - ann.source.image = parseString(sourceNode.child("image")); - - - for (xml_node objNode = annNode.child("object"); objNode; objNode = objNode.next_sibling("object")) { - VOCObject obj; - obj.name = parseString(objNode.child("name")); - obj.difficult = parseBool(objNode.child("difficult")); - obj.occluded = parseBool(objNode.child("occluded")); - obj.pose = parseString(objNode.child("pose")); - obj.truncated = parseBool(objNode.child("truncated")); - - xml_node bndboxNode = objNode.child("bndbox"); - obj.bndbox.xmin = parseInt(bndboxNode.child("xmin")); - obj.bndbox.xmax = parseInt(bndboxNode.child("xmax")); - obj.bndbox.ymin = parseInt(bndboxNode.child("ymin")); - obj.bndbox.ymax = parseInt(bndboxNode.child("ymax")); - - ann.objects.push_back(obj); - } - } - catch (const std::invalid_argument& e) { - THROW_USER_EXCEPTION(1) << "conversion error: " << e.what(); - } - - return ann; -} - -VOCAnnotationParser::~VOCAnnotationParser() { -} - -inline bool ends_with(std::string const & value, std::string const & ending) { - if (ending.size() > value.size()) return false; - return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); -} - -VOCAnnotationCollector::VOCAnnotationCollector(const std::string& path) { - VOCAnnotationParser parser; - if (ends_with(path, ".xml")) { - // A single file - _annotations.push_back(parser.parse(path)); - } else { - std::list baseDirContents = getDirContents(path, true); - for (const auto& sub : baseDirContents) { - std::list annotationDirContents = getDirContents(sub, true); - if (annotationDirContents.size() == 0 && ends_with(sub, ".xml")) { - _annotations.push_back(parser.parse(sub)); - } else { - for (const auto& file : annotationDirContents) { - if (ends_with(file, ".xml")) { - _annotations.push_back(parser.parse(file)); - } - } - } - } - } -} - diff --git a/inference-engine/samples/validation_app/VOCAnnotationParser.hpp b/inference-engine/samples/validation_app/VOCAnnotationParser.hpp deleted file mode 100644 index a9d2d89ac2fc87..00000000000000 --- a/inference-engine/samples/validation_app/VOCAnnotationParser.hpp +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "classification_set_generator.hpp" -#include "pugixml/pugixml.hpp" - -#include -#include - -#pragma pack(1) -struct VOCBoundingBox { - int xmin, xmax, ymin, ymax; -}; - -struct VOCObject { - std::string name; - VOCBoundingBox bndbox; - int difficult; - int occluded; - int truncated; - std::string pose; -}; - -struct VOCAnnotation { - std::string filename; - std::string folder; - std::vector objects; - int segmented; - - struct Size { - uint16_t depth; - size_t width; - size_t height; - } size; - - struct Source { - std::string annotation; - std::string database; - std::string image; - } source; -}; -#pragma pack() - -class VOCAnnotationParser { -private: - static std::string parseString(const pugi::xml_node& node, const std::string& def = ""); - static int parseInt(const pugi::xml_node& node, const int def = 0); - static bool parseBool(const pugi::xml_node& node, bool def = false); - -public: - VOCAnnotationParser() { } - VOCAnnotation parse(const std::string& filename); - virtual ~VOCAnnotationParser(); -}; - -class VOCAnnotationCollector : public ClassificationSetGenerator { -private: - std::vector _annotations; -public: - explicit VOCAnnotationCollector(const std::string& path); - const std::vector& annotations() const { return _annotations; } - const VOCAnnotation* annotationByFile(const std::string& filename) const { - for (auto& ann : _annotations) { - if (ann.filename == filename) return &ann; - } - return nullptr; - } -}; diff --git a/inference-engine/samples/validation_app/YOLOObjectDetectionProcessor.hpp b/inference-engine/samples/validation_app/YOLOObjectDetectionProcessor.hpp deleted file mode 100644 index 816f9699821ede..00000000000000 --- a/inference-engine/samples/validation_app/YOLOObjectDetectionProcessor.hpp +++ /dev/null @@ -1,121 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -class YOLOObjectDetectionProcessor : public ObjectDetectionProcessor { -private: - /** - * \brief This function analyses the YOLO net output for a single class - * @param net_out - The output data - * @param class_num - The class number - * @return a list of found boxes - */ - std::vector yoloNetParseOutput(const float *net_out, int class_num) { - float threshold = 0.2f; // The confidence threshold - int C = 20; // classes - int B = 2; // bounding boxes - int S = 7; // cell size - - std::vector boxes; - std::vector boxes_result; - int SS = S * S; // number of grid cells 7*7 = 49 - // First 980 values corresponds to probabilities for each of the 20 classes for each grid cell. - // These probabilities are conditioned on objects being present in each grid cell. - int prob_size = SS * C; // class probabilities 49 * 20 = 980 - // The next 98 values are confidence scores for 2 bounding boxes predicted by each grid cells. - int conf_size = SS * B; // 49*2 = 98 confidences for each grid cell - - const float *probs = &net_out[0]; - const float *confs = &net_out[prob_size]; - const float *cords = &net_out[prob_size + conf_size]; // 98*4 = 392 coords x, y, w, h - - for (int grid = 0; grid < SS; grid++) { - int row = grid / S; - int col = grid % S; - for (int b = 0; b < B; b++) { - int objectType = class_num; - - float conf = confs[(grid * B + b)]; - float xc = (cords[(grid * B + b) * 4 + 0] + col) / S; - float yc = (cords[(grid * B + b) * 4 + 1] + row) / S; - float w = pow(cords[(grid * B + b) * 4 + 2], 2); - float h = pow(cords[(grid * B + b) * 4 + 3], 2); - float prob = probs[grid * C + class_num] * conf; - - DetectedObject bx(objectType, xc - w / 2, yc - h / 2, xc + w / 2, - yc + h / 2, prob); - - if (prob >= threshold) { - boxes.push_back(bx); - } - } - } - - // Sorting the higher probabilities to the top - sort(boxes.begin(), boxes.end(), - [](const DetectedObject & a, const DetectedObject & b) -> bool { - return a.prob > b.prob; - }); - - // Filtering out overlapping boxes - std::vector overlapped(boxes.size(), false); - for (size_t i = 0; i < boxes.size(); i++) { - if (overlapped[i]) - continue; - - DetectedObject box_i = boxes[i]; - for (size_t j = i + 1; j < boxes.size(); j++) { - DetectedObject box_j = boxes[j]; - if (DetectedObject::ioU(box_i, box_j) >= 0.4) { - overlapped[j] = true; - } - } - } - - for (size_t i = 0; i < boxes.size(); i++) { - if (boxes[i].prob > 0.0f) { - boxes_result.push_back(boxes[i]); - } - } - return boxes_result; - } - -protected: - std::map> processResult(std::vector files) { - std::map> detectedObjects; - - std::string firstOutputName = this->outInfo.begin()->first; - const auto detectionOutArray = inferRequest.GetBlob(firstOutputName); - const float *box = detectionOutArray->buffer().as(); - - std::string file = *files.begin(); - for (int c = 0; c < 20; c++) { - std::vector result = yoloNetParseOutput(box, c); - detectedObjects[file].insert(detectedObjects[file].end(), result.begin(), result.end()); - } - - return detectedObjects; - } - -public: - YOLOObjectDetectionProcessor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, const std::string& subdir, int flags_b, - double threshold, - InferencePlugin plugin, CsvDumper& dumper, - const std::string& flags_a, const std::string& classes_list_file) : - - ObjectDetectionProcessor(flags_m, flags_d, flags_i, subdir, flags_b, threshold, - plugin, dumper, flags_a, classes_list_file, PreprocessingOptions(true, ResizeCropPolicy::Resize), false) { } -}; diff --git a/inference-engine/samples/validation_app/classification_set_generator.cpp b/inference-engine/samples/validation_app/classification_set_generator.cpp deleted file mode 100644 index 051474e3dda50a..00000000000000 --- a/inference-engine/samples/validation_app/classification_set_generator.cpp +++ /dev/null @@ -1,155 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "classification_set_generator.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "user_exception.hpp" -#include "details/ie_exception.hpp" - -#ifdef _WIN32 -# include "w_dirent.h" -#else -# include -# include -#endif - -#include "../common/samples/common.hpp" - -/** - * @brief Gets path part of a filename including separator - * @param filepath - filename to extract path part from - * @return string with path part of the filename - */ -inline std::string folderOf(const std::string &filepath) { - auto pos = filepath.rfind("/"); - if (pos == std::string::npos) pos = filepath.rfind("\\"); - if (pos == std::string::npos) return ""; - return filepath.substr(0, pos + 1); -} - -void readFile(std::string filename, std::function perLine) { - std::ifstream inputFile; - inputFile.open(filename, std::ios::in); - std::string strLine = ""; - - if (!inputFile.is_open()) - THROW_IE_EXCEPTION << "Cannot open file: " << filename; - - size_t lineNumber = 0; - while (std::getline(inputFile, strLine)) { - lineNumber++; - perLine(strLine, lineNumber); - } -} - -std::map ClassificationSetGenerator::readLabels(const std::string& labels) { - _classes.clear(); - int i = 0; - - readFile(labels, [&](std::string& line, size_t lineNumber) { - trim(line); - _classes[line] = i++; - }); - - return _classes; -} - -std::string getFullName(const std::string& name, const std::string& dir) { - return dir + "/" + name; -} - -std::list ClassificationSetGenerator::getDirContents(const std::string& dir, bool includePath) { - struct stat sb; - if (stat(dir.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode)) { - return std::list(); - // THROW_USER_EXCEPTION(1) << "Can't read contents of directory " << dir << ". It isn't a directory or not accessible"; - } - - std::list list; - DIR *dp; - dp = opendir(dir.c_str()); - if (dp == nullptr) { - THROW_USER_EXCEPTION(1) << "Can't open directory " << dir; - } - - struct dirent *ep; - while (nullptr != (ep = readdir(dp))) { - std::string fileName = ep->d_name; - if (fileName == "." || fileName == "..") continue; - list.push_back(includePath ? getFullName(ep->d_name, dir) : ep->d_name); - } - closedir(dp); - return list; -} - -std::vector> ClassificationSetGenerator::validationMapFromTxt(const std::string& file) { - std::string ext = fileExt(file); - if (ext != "txt") { - THROW_USER_EXCEPTION(1) << "Unknown dataset data file format: " << ext << ""; - } - - std::string dir = folderOf(file); - std::vector> validationMap; - std::string imgPath = ""; - int classId = -1; - - readFile(file, [&](std::string& line, size_t lineNumber) { - trim(line); - size_t pos = line.rfind(" "); - if (pos == std::string::npos) { - THROW_USER_EXCEPTION(1) << "Bad file format! Cannot parse line " << lineNumber << ":\n> " << line; - } - try { - classId = std::stoi(line.substr(pos + 1)); - } catch (const std::invalid_argument& e) { - THROW_USER_EXCEPTION(1) << "Invalid class id specified at line " << lineNumber << ":\n> " << line - << " Error: " << e.what(); - } - imgPath = line.substr(0, pos); - validationMap.push_back({ classId, dir + imgPath }); - }); - - return validationMap; -} - -std::vector> ClassificationSetGenerator::validationMapFromFolder(const std::string& dir) { - std::vector> validationMap; - std::list validation_labels = getDirContents(dir, false); - - for (auto& label : validation_labels) { - auto val = _classes.find(label); - if (val == _classes.end()) continue; - - int id = val->second; - for (auto& image : getDirContents(getFullName(label, dir))) { - validationMap.push_back({ id, image }); - } - } - return validationMap; -} - -std::vector> ClassificationSetGenerator::getValidationMap(const std::string& path) { - struct stat sb; - if (stat(path.c_str(), &sb) == 0) { - if (S_ISDIR(sb.st_mode)) { - return validationMapFromFolder(path); - } else { - return validationMapFromTxt(path); - } - } else { - if (errno == ENOENT || errno == EINVAL || errno == EACCES) { - THROW_USER_EXCEPTION(3) << "The specified path \"" << path << "\" can not be found or accessed"; - } - } - return{}; -} diff --git a/inference-engine/samples/validation_app/classification_set_generator.hpp b/inference-engine/samples/validation_app/classification_set_generator.hpp deleted file mode 100644 index 764364aa3216c9..00000000000000 --- a/inference-engine/samples/validation_app/classification_set_generator.hpp +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include -#include - -/** - * @class SetGenerator - * @brief A SetGenerator provides utility functions to read labels and create a multimap of images for pre-processing - */ -class ClassificationSetGenerator { - std::map _classes; - - std::vector> validationMapFromTxt(const std::string& file); - std::vector> validationMapFromFolder(const std::string& dir); - -protected: - std::list getDirContents(const std::string& dir, bool includePath = true); - - -public: - /** - * @brief Reads file with a list of classes names. Every found line is considered to be - * a class name with ID equal to line number - 1 (zero based) - * @param labels - name of a file with labels - * @return map - */ - std::map readLabels(const std::string& labels); - - /** - * @brief Creates a vector of pairs to reflect - * images data reflected by path provided - * @param path - can be a .txt file or a folder. In case of file parses it assuming format is - * relative_path_from_folder_with_txt_extension/image_id. In case of folder searches - * all subfolders which are named exactly like known classes and adds all containing - * files to a map with ID corresponding to subfolder name - * @return vector of pairs {ID: IMAGEPATH} describing all found images. In case folder path was - * provided and no class names are known returns empty map - */ - std::vector> getValidationMap(const std::string& path); -}; diff --git a/inference-engine/samples/validation_app/image_decoder.cpp b/inference-engine/samples/validation_app/image_decoder.cpp deleted file mode 100644 index 0a874268c88f6c..00000000000000 --- a/inference-engine/samples/validation_app/image_decoder.cpp +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "image_decoder.hpp" -#include "details/ie_exception.hpp" -#include -#include -#include -#include -#include -#include -#include - -using namespace cv; - -int getLoadModeForChannels(int channels, int base) { - switch (channels) { - case 1: - return base | IMREAD_GRAYSCALE; - case 3: - return base | IMREAD_COLOR; - } - return IMREAD_UNCHANGED; -} - -template -cv::Size addToBlob(std::string name, int batch_pos, Blob& blob, PreprocessingOptions preprocessingOptions) { - SizeVector blobSize = blob.dims(); - int width = static_cast(blobSize[0]); - int height = static_cast(blobSize[1]); - int channels = static_cast(blobSize[2]); - T* blob_data = static_cast(blob.buffer()); - Mat orig_image, result_image; - int loadMode = getLoadModeForChannels(channels, 0); - - std::string tryName = name; - - // TODO This is a dirty hack to support VOC2007 (where no file extension is put into annotation). - // Rewrite. - if (name.find('.') == std::string::npos) tryName = name + ".JPEG"; - - orig_image = imread(tryName, loadMode); - - if (orig_image.empty()) { - THROW_IE_EXCEPTION << "Cannot open image file: " << tryName; - } - - // Preprocessing the image - Size res = orig_image.size(); - - if (preprocessingOptions.resizeCropPolicy == ResizeCropPolicy::Resize) { - cv::resize(orig_image, result_image, Size(width, height)); - } else if (preprocessingOptions.resizeCropPolicy == ResizeCropPolicy::ResizeThenCrop) { - Mat resized_image; - - cv::resize(orig_image, resized_image, Size(preprocessingOptions.resizeBeforeCropX, preprocessingOptions.resizeBeforeCropY)); - - size_t cx = preprocessingOptions.resizeBeforeCropX / 2; - size_t cy = preprocessingOptions.resizeBeforeCropY / 2; - - cv::Rect cropRect(cx - width / 2, cy - height / 2, width, height); - result_image = resized_image(cropRect); - } else if (preprocessingOptions.resizeCropPolicy == ResizeCropPolicy::DoNothing) { - // No image preprocessing to be done here - result_image = orig_image; - } else { - THROW_IE_EXCEPTION << "Unsupported ResizeCropPolicy value"; - } - - float scaleFactor = preprocessingOptions.scaleValuesTo01 ? 255.0f : 1.0f; - - for (int c = 0; c < channels; c++) { - for (int h = 0; h < height; h++) { - for (int w = 0; w < width; w++) { - blob_data[batch_pos * channels * width * height + c * width * height + h * width + w] = - static_cast(result_image.at(h, w)[c] / scaleFactor); - } - } - } - - return res; -} - -std::map convertToBlob(std::vector names, int batch_pos, Blob& blob, PreprocessingOptions preprocessingOptions) { - if (blob.buffer() == nullptr) { - THROW_IE_EXCEPTION << "Blob was not allocated"; - } - - std::function add_func; - - switch (blob.precision()) { - case Precision::FP32: - add_func = &addToBlob; - break; - case Precision::FP16: - case Precision::Q78: - case Precision::I16: - case Precision::U16: - add_func = &addToBlob; - break; - default: - add_func = &addToBlob; - } - - std::map res; - for (size_t b = 0; b < names.size(); b++) { - std::string name = names[b]; - Size orig_size = add_func(name, batch_pos + b, blob, preprocessingOptions); - res.insert(std::pair(name, orig_size)); - } - - return res; -} - -Size ImageDecoder::loadToBlob(std::string name, Blob& blob, PreprocessingOptions preprocessingOptions) { - std::vector names = { name }; - return loadToBlob(names, blob, preprocessingOptions).at(name); -} - -std::map ImageDecoder::loadToBlob(std::vector names, Blob& blob, PreprocessingOptions preprocessingOptions) { - return convertToBlob(names, 0, blob, preprocessingOptions); -} - -Size ImageDecoder::insertIntoBlob(std::string name, int batch_pos, Blob& blob, PreprocessingOptions preprocessingOptions) { - return convertToBlob({ name }, batch_pos, blob, preprocessingOptions).at(name); -} diff --git a/inference-engine/samples/validation_app/image_decoder.hpp b/inference-engine/samples/validation_app/image_decoder.hpp deleted file mode 100644 index 922956e8bb4fee..00000000000000 --- a/inference-engine/samples/validation_app/image_decoder.hpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include -#include -#include - -#include -#include -#include "ie_blob.h" - -#include "PreprocessingOptions.hpp" - -using namespace cv; -using namespace InferenceEngine; - -class ImageDecoder { -public: - /** - * @brief Load single image to blob - * @param name - image file name - * @param blob - blob object to load image data to - * @return original image sizes - */ - Size loadToBlob(std::string name, Blob& blob, PreprocessingOptions preprocessingOptions); - - /** - * @brief Load a list of images to blob - * @param names - list of images filenames - * @param blob - blob object to load images data to - * @return original image size - */ - std::map loadToBlob(std::vector names, Blob& blob, PreprocessingOptions preprocessingOptions); - - /** - * @brief Insert image data to blob at specified batch position. - * Does no checks if blob has sufficient space - * @param name - image file name - * @param batch_pos - batch position image should be loaded to - * @param blob - blob object to load image data to - * @return original image size - */ - Size insertIntoBlob(std::string name, int batch_pos, Blob& blob, PreprocessingOptions preprocessingOptions); -}; diff --git a/inference-engine/samples/validation_app/main.cpp b/inference-engine/samples/validation_app/main.cpp deleted file mode 100644 index aab215e4134c81..00000000000000 --- a/inference-engine/samples/validation_app/main.cpp +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/** - * @brief The entry point for Inference Engine validation application - * @file validation_app/main.cpp - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include "user_exception.hpp" -#include "ClassificationProcessor.hpp" -#include "SSDObjectDetectionProcessor.hpp" -#include "YOLOObjectDetectionProcessor.hpp" - -using namespace std; -using namespace InferenceEngine; - -using InferenceEngine::details::InferenceEngineException; - -/// @brief Message for help argument -static const char help_message[] = "Print a help message"; -/// @brief Message for images argument -static const char image_message[] = "Required. Folder with validation images. Path to a directory with validation images. For Classification models," - " the directory must contain folders named as labels with images inside or a .txt file with" - " a list of images. For Object Detection models, the dataset must be in" - " VOC format."; -/// @brief Message for plugin_path argument -static const char plugin_path_message[] = "Required. Path to an .xml file with a trained model, including model name and " - "extension."; -/// @brief Message for model argument -static const char model_message[] = "Required. Path to an .xml file with a trained model"; -/// @brief Message for plugin argument -static const char plugin_message[] = "Plugin name. For example, CPU. If this parameter is passed, " - "the sample looks for a specified plugin only."; -/// @brief Message for assigning cnn calculation to device -static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD." - " The application looks for a suitable plugin for the specified device."; -/// @brief Message for label argument -static const char label_message[] = "Path to a file with labels for a model"; -/// @brief Message for batch argumenttype -static const char batch_message[] = "Batch size value. If not specified, the batch size value is taken from IR"; -/// @brief Message for dump argument -static const char dump_message[] = "Dump file names and inference results to a .csv file"; -/// @brief Message for network type -static const char type_message[] = "Type of an inferred network (\"C\" by default)"; -/// @brief Message for pp-type -static const char preprocessing_type[] = "Preprocessing type. Options: \"None\", \"Resize\", \"ResizeCrop\""; -/// @brief Message for pp-crop-size -static const char preprocessing_size[] = "Preprocessing size (used with ppType=\"ResizeCrop\")"; -static const char preprocessing_width[] = "Preprocessing width (overrides -ppSize, used with ppType=\"ResizeCrop\")"; -static const char preprocessing_height[] = "Preprocessing height (overrides -ppSize, used with ppType=\"ResizeCrop\")"; - -static const char obj_detection_annotations_message[] = "Required for Object Detection models. Path to a directory" - " containing an .xml file with annotations for images."; - -static const char obj_detection_classes_message[] = "Required for Object Detection models. Path to a file containing" - " a list of classes"; - -static const char obj_detection_subdir_message[] = "Directory between the path to images (specified with -i) and image name (specified in the" - " .xml file). For VOC2007 dataset, use JPEGImages."; -static const char obj_detection_kind_message[] = "Type of an Object Detection model. Options: SSD"; - -/// @brief Message for GPU custom kernels desc -static const char custom_cldnn_message[] = "Required for GPU custom kernels." - "Absolute path to an .xml file with the kernel descriptions."; - -/// @brief Message for user library argument -static const char custom_cpu_library_message[] = "Required for CPU custom layers. " - "Absolute path to a shared library with the kernel implementations"; - -/// @brief Message for labels file -static const char labels_file_message[] = "Labels file path. The labels file contains names of the dataset classes"; - -static const char zero_background_message[] = "\"Zero is a background\" flag. Some networks are trained with a modified" - " dataset where the class IDs " - " are enumerated from 1, but 0 is an undefined \"background\" class" - " (which is never detected)"; - -static const char plain_output_message[] = "Flag for plain output"; - - -/// @brief Network type options and their descriptions -static const char* types_descriptions[][2] = { - { "C", "classification" }, -// { "SS", "semantic segmentation" }, // Not supported yet - { "OD", "object detection" }, - { nullptr, nullptr } -}; - -/// @brief Define flag for showing help message
-DEFINE_bool(h, false, help_message); -/// @brief Define parameter for a path to images
-/// It is a required parameter -DEFINE_string(i, "", image_message); -/// @brief Define parameter for a path to model file
-/// It is a required parameter -DEFINE_string(m, "", model_message); -/// @brief Define parameter for a plugin name
-/// It is a required parameter -DEFINE_string(p, "", plugin_message); -/// @brief Define parameter for a path to a file with labels
-/// Default is empty -DEFINE_string(OCl, "", label_message); -/// @brief Define parameter for a path to plugins
-/// Default is ./lib -DEFINE_string(pp, "", plugin_path_message); -/// @brief Define parameter for a target device to infer on
-DEFINE_string(d, "CPU", target_device_message); -/// @brief Define parameter for batch size
-/// Default is 0 (which means that batch size is not specified) -DEFINE_int32(b, 0, batch_message); -/// @brief Define flag to dump results to a file
-DEFINE_bool(dump, false, dump_message); -/// @brief Define parameter for a network type parameter -DEFINE_string(t, "C", type_message); - -/// @brief Define parameter for preprocessing type -DEFINE_string(ppType, "", preprocessing_type); - -/// @brief Define parameter for preprocessing size -DEFINE_int32(ppSize, 0, preprocessing_size); -DEFINE_int32(ppWidth, 0, preprocessing_width); -DEFINE_int32(ppHeight, 0, preprocessing_height); - -DEFINE_bool(Czb, false, zero_background_message); - -DEFINE_string(ODa, "", obj_detection_annotations_message); - -DEFINE_string(ODc, "", obj_detection_classes_message); - -DEFINE_string(ODsubdir, "", obj_detection_subdir_message); - -/// @brief Define parameter for a type of Object Detection network -DEFINE_string(ODkind, "SSD", obj_detection_kind_message); - -/// @brief Define parameter for GPU kernels path
-/// Default is ./lib -DEFINE_string(c, "", custom_cldnn_message); - -/// @brief Define parameter for a path to CPU library with user layers
-/// It is an optional parameter -DEFINE_string(l, "", custom_cpu_library_message); - -/// @brief Flag for printing plain text -DEFINE_bool(plain, false, plain_output_message); - -DEFINE_string(lbl, "", labels_file_message); - -/** - * @brief This function shows a help message - */ -static void showUsage() { - std::cout << std::endl; - std::cout << "Usage: validation_app [OPTION]" << std::endl << std::endl; - std::cout << "Available options:" << std::endl; - std::cout << std::endl; - std::cout << " -h " << help_message << std::endl; - std::cout << " -t " << type_message << std::endl; - for (int i = 0; types_descriptions[i][0] != nullptr; i++) { - std::cout << " -t \"" << types_descriptions[i][0] << "\" for " << types_descriptions[i][1] << std::endl; - } - std::cout << " -i " << image_message << std::endl; - std::cout << " -m " << model_message << std::endl; - std::cout << " -lbl " << labels_file_message << std::endl; - std::cout << " -l " << custom_cpu_library_message << std::endl; - std::cout << " -c " << custom_cldnn_message << std::endl; - std::cout << " -d " << target_device_message << std::endl; - std::cout << " -b N " << batch_message << std::endl; - std::cout << " -ppType " << preprocessing_type << std::endl; - std::cout << " -ppSize N " << preprocessing_size << std::endl; - std::cout << " -ppWidth W " << preprocessing_width << std::endl; - std::cout << " -ppHeight H " << preprocessing_height << std::endl; - std::cout << " --dump " << dump_message << std::endl; - - std::cout << std::endl; - std::cout << " Classification-specific options:" << std::endl; - std::cout << " -Czb true " << zero_background_message << std::endl; - - std::cout << std::endl; - std::cout << " Object detection-specific options:" << std::endl; - std::cout << " -ODkind " << obj_detection_kind_message << std::endl; - std::cout << " -ODa " << obj_detection_annotations_message << std::endl; - std::cout << " -ODc " << obj_detection_classes_message << std::endl; - std::cout << " -ODsubdir " << obj_detection_subdir_message << std::endl << std::endl; -} - -enum NetworkType { - Undefined = -1, - Classification, - ObjDetection -}; - -std::string strtolower(const std::string& s) { - std::string res = s; - std::transform(res.begin(), res.end(), res.begin(), ::tolower); - return res; -} - -/** - * @brief The main function of Inference Engine sample application - * @param argc - The number of arguments - * @param argv - Arguments - * @return 0 if all good - */ -int main(int argc, char *argv[]) { - try { - slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; - - // ---------------------------Parsing and validating input arguments-------------------------------------- - slog::info << "Parsing input parameters" << slog::endl; - - bool noOptions = argc == 1; - - gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); - if (FLAGS_h || noOptions) { - showUsage(); - return 1; - } - - UserExceptions ee; - - NetworkType netType = Undefined; - // Checking the network type - if (std::string(FLAGS_t) == "C") { - netType = Classification; - } else if (std::string(FLAGS_t) == "OD") { - netType = ObjDetection; - } else { - ee << UserException(5, "Unknown network type specified (invalid -t option)"); - } - - // Checking required options - if (FLAGS_m.empty()) ee << UserException(3, "Model file is not specified (missing -m option)"); - if (FLAGS_i.empty()) ee << UserException(4, "Images list is not specified (missing -i option)"); - if (FLAGS_d.empty()) ee << UserException(5, "Target device is not specified (missing -d option)"); - if (FLAGS_b < 0) ee << UserException(6, "Batch must be positive (invalid -b option value)"); - - if (netType == ObjDetection) { - // Checking required OD-specific options - if (FLAGS_ODa.empty()) ee << UserException(11, "Annotations folder is not specified for object detection (missing -a option)"); - if (FLAGS_ODc.empty()) ee << UserException(12, "Classes file is not specified (missing -c option)"); - } - - if (!ee.empty()) throw ee; - // ----------------------------------------------------------------------------------------------------- - - // ---------------------Loading plugin for Inference Engine------------------------------------------------ - slog::info << "Loading plugin" << slog::endl; - /** Loading the library with extensions if provided**/ - InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d); - - /** Loading default extensions **/ - if (FLAGS_d.find("CPU") != std::string::npos) { - /** - * cpu_extensions library is compiled from "extension" folder containing - * custom CPU plugin layer implementations. These layers are not supported - * by CPU, but they can be useful for inferring custom topologies. - **/ - plugin.AddExtension(std::make_shared()); - } - - if (!FLAGS_l.empty()) { - // CPU extensions are loaded as a shared library and passed as a pointer to base extension - IExtensionPtr extension_ptr = make_so_pointer(FLAGS_l); - plugin.AddExtension(extension_ptr); - slog::info << "CPU Extension loaded: " << FLAGS_l << slog::endl; - } - if (!FLAGS_c.empty()) { - // CPU extensions are loaded from an .xml description and OpenCL kernel files - plugin.SetConfig({{PluginConfigParams::KEY_CONFIG_FILE, FLAGS_c}}); - slog::info << "GPU Extension loaded: " << FLAGS_c << slog::endl; - } - - printPluginVersion(plugin, std::cout); - - CsvDumper dumper(FLAGS_dump); - - std::shared_ptr processor; - - PreprocessingOptions preprocessingOptions; - if (strtolower(FLAGS_ppType.c_str()) == "none") { - preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::DoNothing); - } else if (strtolower(FLAGS_ppType) == "resizecrop") { - size_t ppWidth = FLAGS_ppSize; - size_t ppHeight = FLAGS_ppSize; - - if (FLAGS_ppWidth > 0) ppWidth = FLAGS_ppSize; - if (FLAGS_ppHeight > 0) ppHeight = FLAGS_ppSize; - - if (FLAGS_ppSize > 0 || (FLAGS_ppWidth > 0 && FLAGS_ppHeight > 0)) { - preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::ResizeThenCrop, ppWidth, ppHeight); - } else { - THROW_USER_EXCEPTION(2) << "Size must be specified for preprocessing type " << FLAGS_ppType; - } - } else if (strtolower(FLAGS_ppType) == "resize" || FLAGS_ppType.empty()) { - preprocessingOptions = PreprocessingOptions(false, ResizeCropPolicy::Resize); - } else { - THROW_USER_EXCEPTION(2) << "Unknown preprocessing type: " << FLAGS_ppType; - } - - if (netType == Classification) { - processor = std::shared_ptr( - new ClassificationProcessor(FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_b, - plugin, dumper, FLAGS_lbl, preprocessingOptions, FLAGS_Czb)); - } else if (netType == ObjDetection) { - if (FLAGS_ODkind == "SSD") { - processor = std::shared_ptr( - new SSDObjectDetectionProcessor(FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_ODsubdir, FLAGS_b, - 0.5, plugin, dumper, FLAGS_ODa, FLAGS_ODc)); - } else if (FLAGS_ODkind == "YOLO") { - processor = std::shared_ptr( - new YOLOObjectDetectionProcessor(FLAGS_m, FLAGS_d, FLAGS_i, FLAGS_ODsubdir, FLAGS_b, - 0.5, plugin, dumper, FLAGS_ODa, FLAGS_ODc)); - } - } else { - THROW_USER_EXCEPTION(2) << "Unknown network type specified" << FLAGS_ppType; - } - if (!processor.get()) { - THROW_USER_EXCEPTION(2) << "Processor pointer is invalid" << FLAGS_ppType; - } - slog::info << (FLAGS_d.empty() ? "Plugin: " + FLAGS_p : "Device: " + FLAGS_d) << slog::endl; - shared_ptr pIM = processor->Process(FLAGS_plain); - processor->Report(*pIM.get()); - - if (dumper.dumpEnabled()) { - slog::info << "Dump file generated: " << dumper.getFilename() << slog::endl; - } - } catch (const InferenceEngineException& ex) { - slog::err << "Inference problem: \n" << ex.what() << slog::endl; - return 1; - } catch (const UserException& ex) { - slog::err << "Input problem: \n" << ex.what() << slog::endl; - showUsage(); - return ex.exitCode(); - } catch (const UserExceptions& ex) { - if (ex.list().size() == 1) { - slog::err << "Input problem: " << ex.what() << slog::endl; - showUsage(); - return ex.list().begin()->exitCode(); - } else { - slog::err << "Input problems: \n" << ex.what() << slog::endl; - showUsage(); - return ex.list().begin()->exitCode(); - } - } catch (const std::exception& ex) { - slog::err << ex.what() << slog::endl; - return 1; - } catch (...) { - slog::err << "Unknown/internal exception happened." << slog::endl; - return 1; - } - return 0; -} diff --git a/inference-engine/samples/validation_app/pugixml/pugiconfig.hpp b/inference-engine/samples/validation_app/pugixml/pugiconfig.hpp deleted file mode 100644 index 085d6c67b245f8..00000000000000 --- a/inference-engine/samples/validation_app/pugixml/pugiconfig.hpp +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#ifndef HEADER_PUGICONFIG_HPP -#define HEADER_PUGICONFIG_HPP - -// Uncomment this to enable wchar_t mode -// #define PUGIXML_WCHAR_MODE - -// Uncomment this to enable compact mode -// #define PUGIXML_COMPACT - -// Uncomment this to disable XPath -// #define PUGIXML_NO_XPATH - -// Uncomment this to disable STL -// #define PUGIXML_NO_STL - -// Uncomment this to disable exceptions -// #define PUGIXML_NO_EXCEPTIONS - -// Set this to control attributes for public classes/functions, i.e.: -// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL -// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL -// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall -// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead - -// Tune these constants to adjust memory-related behavior -// #define PUGIXML_MEMORY_PAGE_SIZE 32768 -// #define PUGIXML_MEMORY_OUTPUT_STACK 10240 -// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096 - -// Uncomment this to switch to header-only version -// #define PUGIXML_HEADER_ONLY - -// Uncomment this to enable long long support -// #define PUGIXML_HAS_LONG_LONG - -#endif - -/** - * Copyright (c) 2006-2016 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ diff --git a/inference-engine/samples/validation_app/pugixml/pugixml.cpp b/inference-engine/samples/validation_app/pugixml/pugixml.cpp deleted file mode 100644 index aa18656d7c6863..00000000000000 --- a/inference-engine/samples/validation_app/pugixml/pugixml.cpp +++ /dev/null @@ -1,12626 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#ifndef SOURCE_PUGIXML_CPP -#define SOURCE_PUGIXML_CPP - -#include "pugixml.hpp" - -#include -#include -#include -#include -#include - -#ifdef PUGIXML_WCHAR_MODE -# include -#endif - -#ifndef PUGIXML_NO_XPATH -# include -# include -# ifdef PUGIXML_NO_EXCEPTIONS -# include -# endif -#endif - -#ifndef PUGIXML_NO_STL -# include -# include -# include -#endif - -// For placement new -#include - -#ifdef _MSC_VER -# pragma warning(push) -# pragma warning(disable: 4127) // conditional expression is constant -# pragma warning(disable: 4324) // structure was padded due to __declspec(align()) -# pragma warning(disable: 4611) // interaction between '_setjmp' and C++ object destruction is non-portable -# pragma warning(disable: 4702) // unreachable code -# pragma warning(disable: 4996) // this function or variable may be unsafe -# pragma warning(disable: 4793) // function compiled as native: presence of '_setjmp' makes a function unmanaged -#endif - -#ifdef __INTEL_COMPILER -# pragma warning(disable: 177) // function was declared but never referenced -# pragma warning(disable: 279) // controlling expression is constant -# pragma warning(disable: 1478 1786) // function was declared "deprecated" -# pragma warning(disable: 1684) // conversion from pointer to same-sized integral type -#endif - -#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) -# pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away -#endif - -#ifdef __BORLANDC__ -# pragma option push -# pragma warn -8008 // condition is always false -# pragma warn -8066 // unreachable code -#endif - -#ifdef __SNC__ -// Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug -# pragma diag_suppress=178 // function was declared but never referenced -# pragma diag_suppress=237 // controlling expression is constant -#endif - -// Inlining controls -#if defined(_MSC_VER) && _MSC_VER >= 1300 -# define PUGI__NO_INLINE __declspec(noinline) -#elif defined(__GNUC__) -# define PUGI__NO_INLINE __attribute__((noinline)) -#else -# define PUGI__NO_INLINE -#endif - -// Branch weight controls -#if defined(__GNUC__) -# define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) -#else -# define PUGI__UNLIKELY(cond) (cond) -#endif - -// Simple static assertion -#define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } - -// Digital Mars C++ bug workaround for passing char loaded from memory via stack -#ifdef __DMC__ -# define PUGI__DMC_VOLATILE volatile -#else -# define PUGI__DMC_VOLATILE -#endif - -// Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) -#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) -using std::memcpy; -using std::memmove; -using std::memset; -#endif - -// Some MinGW versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode -#if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) -# define LLONG_MAX 9223372036854775807LL -# define LLONG_MIN (-LLONG_MAX-1) -# define ULLONG_MAX (2ULL*LLONG_MAX+1) -#endif - -// In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features -#if defined(_MSC_VER) && !defined(__S3E__) -# define PUGI__MSVC_CRT_VERSION _MSC_VER -#endif - -#ifdef PUGIXML_HEADER_ONLY -# define PUGI__NS_BEGIN namespace pugi { namespace impl { -# define PUGI__NS_END } } -# define PUGI__FN inline -# define PUGI__FN_NO_INLINE inline -#else -# if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces -# define PUGI__NS_BEGIN namespace pugi { namespace impl { -# define PUGI__NS_END } } -# else -# define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { -# define PUGI__NS_END } } } -# endif -# define PUGI__FN -# define PUGI__FN_NO_INLINE PUGI__NO_INLINE -#endif - -// uintptr_t -#if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) -namespace pugi -{ -# ifndef _UINTPTR_T_DEFINED - typedef size_t uintptr_t; -# endif - - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; -} -#else -# include -#endif - -// Memory allocation -PUGI__NS_BEGIN - PUGI__FN void* default_allocate(size_t size) - { - return malloc(size); - } - - PUGI__FN void default_deallocate(void* ptr) - { - free(ptr); - } - - template - struct xml_memory_management_function_storage - { - static allocation_function allocate; - static deallocation_function deallocate; - }; - - // Global allocation functions are stored in class statics so that in header mode linker deduplicates them - // Without a template<> we'll get multiple definitions of the same static - template allocation_function xml_memory_management_function_storage::allocate = default_allocate; - template deallocation_function xml_memory_management_function_storage::deallocate = default_deallocate; - - typedef xml_memory_management_function_storage xml_memory; -PUGI__NS_END - -// String utilities -PUGI__NS_BEGIN - // Get string length - PUGI__FN size_t strlength(const char_t* s) - { - assert(s); - - #ifdef PUGIXML_WCHAR_MODE - return wcslen(s); - #else - return strlen(s); - #endif - } - - // Compare two strings - PUGI__FN bool strequal(const char_t* src, const char_t* dst) - { - assert(src && dst); - - #ifdef PUGIXML_WCHAR_MODE - return wcscmp(src, dst) == 0; - #else - return strcmp(src, dst) == 0; - #endif - } - - // Compare lhs with [rhs_begin, rhs_end) - PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) - { - for (size_t i = 0; i < count; ++i) - if (lhs[i] != rhs[i]) - return false; - - return lhs[count] == 0; - } - - // Get length of wide string, even if CRT lacks wide character support - PUGI__FN size_t strlength_wide(const wchar_t* s) - { - assert(s); - - #ifdef PUGIXML_WCHAR_MODE - return wcslen(s); - #else - const wchar_t* end = s; - while (*end) end++; - return static_cast(end - s); - #endif - } -PUGI__NS_END - -// auto_ptr-like object for exception recovery -PUGI__NS_BEGIN - template struct auto_deleter - { - typedef void (*D)(T*); - - T* data; - D deleter; - - auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) - { - } - - ~auto_deleter() - { - if (data) deleter(data); - } - - T* release() - { - T* result = data; - data = 0; - return result; - } - }; -PUGI__NS_END - -#ifdef PUGIXML_COMPACT -PUGI__NS_BEGIN - class compact_hash_table - { - public: - compact_hash_table(): _items(0), _capacity(0), _count(0) - { - } - - void clear() - { - if (_items) - { - xml_memory::deallocate(_items); - _items = 0; - _capacity = 0; - _count = 0; - } - } - - void** find(const void* key) - { - assert(key); - - if (_capacity == 0) return 0; - - size_t hashmod = _capacity - 1; - size_t bucket = hash(key) & hashmod; - - for (size_t probe = 0; probe <= hashmod; ++probe) - { - item_t& probe_item = _items[bucket]; - - if (probe_item.key == key) - return &probe_item.value; - - if (probe_item.key == 0) - return 0; - - // hash collision, quadratic probing - bucket = (bucket + probe + 1) & hashmod; - } - - assert(false && "Hash table is full"); - return 0; - } - - void** insert(const void* key) - { - assert(key); - assert(_capacity != 0 && _count < _capacity - _capacity / 4); - - size_t hashmod = _capacity - 1; - size_t bucket = hash(key) & hashmod; - - for (size_t probe = 0; probe <= hashmod; ++probe) - { - item_t& probe_item = _items[bucket]; - - if (probe_item.key == 0) - { - probe_item.key = key; - _count++; - return &probe_item.value; - } - - if (probe_item.key == key) - return &probe_item.value; - - // hash collision, quadratic probing - bucket = (bucket + probe + 1) & hashmod; - } - - assert(false && "Hash table is full"); - return 0; - } - - bool reserve() - { - if (_count + 16 >= _capacity - _capacity / 4) - return rehash(); - - return true; - } - - private: - struct item_t - { - const void* key; - void* value; - }; - - item_t* _items; - size_t _capacity; - - size_t _count; - - bool rehash(); - - static unsigned int hash(const void* key) - { - unsigned int h = static_cast(reinterpret_cast(key)); - - // MurmurHash3 32-bit finalizer - h ^= h >> 16; - h *= 0x85ebca6bu; - h ^= h >> 13; - h *= 0xc2b2ae35u; - h ^= h >> 16; - - return h; - } - }; - - PUGI__FN_NO_INLINE bool compact_hash_table::rehash() - { - compact_hash_table rt; - rt._capacity = (_capacity == 0) ? 32 : _capacity * 2; - rt._items = static_cast(xml_memory::allocate(sizeof(item_t) * rt._capacity)); - - if (!rt._items) - return false; - - memset(rt._items, 0, sizeof(item_t) * rt._capacity); - - for (size_t i = 0; i < _capacity; ++i) - if (_items[i].key) - *rt.insert(_items[i].key) = _items[i].value; - - if (_items) - xml_memory::deallocate(_items); - - _capacity = rt._capacity; - _items = rt._items; - - assert(_count == rt._count); - - return true; - } - -PUGI__NS_END -#endif - -PUGI__NS_BEGIN -#ifdef PUGIXML_COMPACT - static const uintptr_t xml_memory_block_alignment = 4; -#else - static const uintptr_t xml_memory_block_alignment = sizeof(void*); -#endif - - // extra metadata bits - static const uintptr_t xml_memory_page_contents_shared_mask = 64; - static const uintptr_t xml_memory_page_name_allocated_mask = 32; - static const uintptr_t xml_memory_page_value_allocated_mask = 16; - static const uintptr_t xml_memory_page_type_mask = 15; - - // combined masks for string uniqueness - static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; - static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; - -#ifdef PUGIXML_COMPACT - #define PUGI__GETHEADER_IMPL(object, page, flags) // unused - #define PUGI__GETPAGE_IMPL(header) (header).get_page() -#else - #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast(object) - reinterpret_cast(page)) << 8) | (flags)) - #define PUGI__GETPAGE_IMPL(header) const_cast(reinterpret_cast(reinterpret_cast(&header) - (header >> 8))) -#endif - - #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) - #define PUGI__NODETYPE(n) static_cast((n)->header & impl::xml_memory_page_type_mask) - - struct xml_allocator; - - struct xml_memory_page - { - static xml_memory_page* construct(void* memory) - { - xml_memory_page* result = static_cast(memory); - - result->allocator = 0; - result->prev = 0; - result->next = 0; - result->busy_size = 0; - result->freed_size = 0; - - #ifdef PUGIXML_COMPACT - result->compact_string_base = 0; - result->compact_shared_parent = 0; - result->compact_page_marker = 0; - #endif - - return result; - } - - xml_allocator* allocator; - - xml_memory_page* prev; - xml_memory_page* next; - - size_t busy_size; - size_t freed_size; - - #ifdef PUGIXML_COMPACT - char_t* compact_string_base; - void* compact_shared_parent; - uint32_t* compact_page_marker; - #endif - }; - - static const size_t xml_memory_page_size = - #ifdef PUGIXML_MEMORY_PAGE_SIZE - (PUGIXML_MEMORY_PAGE_SIZE) - #else - 32768 - #endif - - sizeof(xml_memory_page); - - struct xml_memory_string_header - { - uint16_t page_offset; // offset from page->data - uint16_t full_size; // 0 if string occupies whole page - }; - - struct xml_allocator - { - xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) - { - #ifdef PUGIXML_COMPACT - _hash = 0; - #endif - } - - xml_memory_page* allocate_page(size_t data_size) - { - size_t size = sizeof(xml_memory_page) + data_size; - - // allocate block with some alignment, leaving memory for worst-case padding - void* memory = xml_memory::allocate(size); - if (!memory) return 0; - - // prepare page structure - xml_memory_page* page = xml_memory_page::construct(memory); - assert(page); - - page->allocator = _root->allocator; - - return page; - } - - static void deallocate_page(xml_memory_page* page) - { - xml_memory::deallocate(page); - } - - void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); - - void* allocate_memory(size_t size, xml_memory_page*& out_page) - { - if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) - return allocate_memory_oob(size, out_page); - - void* buf = reinterpret_cast(_root) + sizeof(xml_memory_page) + _busy_size; - - _busy_size += size; - - out_page = _root; - - return buf; - } - - #ifdef PUGIXML_COMPACT - void* allocate_object(size_t size, xml_memory_page*& out_page) - { - void* result = allocate_memory(size + sizeof(uint32_t), out_page); - if (!result) return 0; - - // adjust for marker - ptrdiff_t offset = static_cast(result) - reinterpret_cast(out_page->compact_page_marker); - - if (PUGI__UNLIKELY(static_cast(offset) >= 256 * xml_memory_block_alignment)) - { - // insert new marker - uint32_t* marker = static_cast(result); - - *marker = static_cast(reinterpret_cast(marker) - reinterpret_cast(out_page)); - out_page->compact_page_marker = marker; - - // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block - // this will make sure deallocate_memory correctly tracks the size - out_page->freed_size += sizeof(uint32_t); - - return marker + 1; - } - else - { - // roll back uint32_t part - _busy_size -= sizeof(uint32_t); - - return result; - } - } - #else - void* allocate_object(size_t size, xml_memory_page*& out_page) - { - return allocate_memory(size, out_page); - } - #endif - - void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) - { - if (page == _root) page->busy_size = _busy_size; - - assert(ptr >= reinterpret_cast(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast(page) + sizeof(xml_memory_page) + page->busy_size); - (void)!ptr; - - page->freed_size += size; - assert(page->freed_size <= page->busy_size); - - if (page->freed_size == page->busy_size) - { - if (page->next == 0) - { - assert(_root == page); - - // top page freed, just reset sizes - page->busy_size = 0; - page->freed_size = 0; - - #ifdef PUGIXML_COMPACT - // reset compact state to maximize efficiency - page->compact_string_base = 0; - page->compact_shared_parent = 0; - page->compact_page_marker = 0; - #endif - - _busy_size = 0; - } - else - { - assert(_root != page); - assert(page->prev); - - // remove from the list - page->prev->next = page->next; - page->next->prev = page->prev; - - // deallocate - deallocate_page(page); - } - } - } - - char_t* allocate_string(size_t length) - { - static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; - - PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); - - // allocate memory for string and header block - size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); - - // round size up to block alignment boundary - size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); - - xml_memory_page* page; - xml_memory_string_header* header = static_cast(allocate_memory(full_size, page)); - - if (!header) return 0; - - // setup header - ptrdiff_t page_offset = reinterpret_cast(header) - reinterpret_cast(page) - sizeof(xml_memory_page); - - assert(page_offset % xml_memory_block_alignment == 0); - assert(page_offset >= 0 && static_cast(page_offset) < max_encoded_offset); - header->page_offset = static_cast(static_cast(page_offset) / xml_memory_block_alignment); - - // full_size == 0 for large strings that occupy the whole page - assert(full_size % xml_memory_block_alignment == 0); - assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); - header->full_size = static_cast(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); - - // round-trip through void* to avoid 'cast increases required alignment of target type' warning - // header is guaranteed a pointer-sized alignment, which should be enough for char_t - return static_cast(static_cast(header + 1)); - } - - void deallocate_string(char_t* string) - { - // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings - // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string - - // get header - xml_memory_string_header* header = static_cast(static_cast(string)) - 1; - assert(header); - - // deallocate - size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; - xml_memory_page* page = reinterpret_cast(static_cast(reinterpret_cast(header) - page_offset)); - - // if full_size == 0 then this string occupies the whole page - size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; - - deallocate_memory(header, full_size, page); - } - - bool reserve() - { - #ifdef PUGIXML_COMPACT - return _hash->reserve(); - #else - return true; - #endif - } - - xml_memory_page* _root; - size_t _busy_size; - - #ifdef PUGIXML_COMPACT - compact_hash_table* _hash; - #endif - }; - - PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) - { - const size_t large_allocation_threshold = xml_memory_page_size / 4; - - xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); - out_page = page; - - if (!page) return 0; - - if (size <= large_allocation_threshold) - { - _root->busy_size = _busy_size; - - // insert page at the end of linked list - page->prev = _root; - _root->next = page; - _root = page; - - _busy_size = size; - } - else - { - // insert page before the end of linked list, so that it is deleted as soon as possible - // the last page is not deleted even if it's empty (see deallocate_memory) - assert(_root->prev); - - page->prev = _root->prev; - page->next = _root; - - _root->prev->next = page; - _root->prev = page; - - page->busy_size = size; - } - - return reinterpret_cast(page) + sizeof(xml_memory_page); - } -PUGI__NS_END - -#ifdef PUGIXML_COMPACT -PUGI__NS_BEGIN - static const uintptr_t compact_alignment_log2 = 2; - static const uintptr_t compact_alignment = 1 << compact_alignment_log2; - - class compact_header - { - public: - compact_header(xml_memory_page* page, unsigned int flags) - { - PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); - - ptrdiff_t offset = (reinterpret_cast(this) - reinterpret_cast(page->compact_page_marker)); - assert(offset % compact_alignment == 0 && static_cast(offset) < 256 * compact_alignment); - - _page = static_cast(offset >> compact_alignment_log2); - _flags = static_cast(flags); - } - - void operator&=(uintptr_t mod) - { - _flags &= static_cast(mod); - } - - void operator|=(uintptr_t mod) - { - _flags |= static_cast(mod); - } - - uintptr_t operator&(uintptr_t mod) const - { - return _flags & mod; - } - - xml_memory_page* get_page() const - { - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - const char* page_marker = reinterpret_cast(this) - (_page << compact_alignment_log2); - const char* page = page_marker - *reinterpret_cast(static_cast(page_marker)); - - return const_cast(reinterpret_cast(static_cast(page))); - } - - private: - unsigned char _page; - unsigned char _flags; - }; - - PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) - { - const compact_header* header = reinterpret_cast(static_cast(object) - header_offset); - - return header->get_page(); - } - - template PUGI__FN_NO_INLINE T* compact_get_value(const void* object) - { - return static_cast(*compact_get_page(object, header_offset)->allocator->_hash->find(object)); - } - - template PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) - { - *compact_get_page(object, header_offset)->allocator->_hash->insert(object) = value; - } - - template class compact_pointer - { - public: - compact_pointer(): _data(0) - { - } - - void operator=(const compact_pointer& rhs) - { - *this = rhs + 0; - } - - void operator=(T* value) - { - if (value) - { - // value is guaranteed to be compact-aligned; 'this' is not - // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) - // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to - // compensate for arithmetic shift rounding for negative values - ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this); - ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; - - if (static_cast(offset) <= 253) - _data = static_cast(offset + 1); - else - { - compact_set_value(this, value); - - _data = 255; - } - } - else - _data = 0; - } - - operator T*() const - { - if (_data) - { - if (_data < 255) - { - uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); - - return reinterpret_cast(base + ((_data - 1 + start) << compact_alignment_log2)); - } - else - return compact_get_value(this); - } - else - return 0; - } - - T* operator->() const - { - return *this; - } - - private: - unsigned char _data; - }; - - template class compact_pointer_parent - { - public: - compact_pointer_parent(): _data(0) - { - } - - void operator=(const compact_pointer_parent& rhs) - { - *this = rhs + 0; - } - - void operator=(T* value) - { - if (value) - { - // value is guaranteed to be compact-aligned; 'this' is not - // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) - // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to - // compensate for arithmetic shift behavior for negative values - ptrdiff_t diff = reinterpret_cast(value) - reinterpret_cast(this); - ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; - - if (static_cast(offset) <= 65533) - { - _data = static_cast(offset + 1); - } - else - { - xml_memory_page* page = compact_get_page(this, header_offset); - - if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) - page->compact_shared_parent = value; - - if (page->compact_shared_parent == value) - { - _data = 65534; - } - else - { - compact_set_value(this, value); - - _data = 65535; - } - } - } - else - { - _data = 0; - } - } - - operator T*() const - { - if (_data) - { - if (_data < 65534) - { - uintptr_t base = reinterpret_cast(this) & ~(compact_alignment - 1); - - return reinterpret_cast(base + ((_data - 1 - 65533) << compact_alignment_log2)); - } - else if (_data == 65534) - return static_cast(compact_get_page(this, header_offset)->compact_shared_parent); - else - return compact_get_value(this); - } - else - return 0; - } - - T* operator->() const - { - return *this; - } - - private: - uint16_t _data; - }; - - template class compact_string - { - public: - compact_string(): _data(0) - { - } - - void operator=(const compact_string& rhs) - { - *this = rhs + 0; - } - - void operator=(char_t* value) - { - if (value) - { - xml_memory_page* page = compact_get_page(this, header_offset); - - if (PUGI__UNLIKELY(page->compact_string_base == 0)) - page->compact_string_base = value; - - ptrdiff_t offset = value - page->compact_string_base; - - if (static_cast(offset) < (65535 << 7)) - { - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); - - if (*base == 0) - { - *base = static_cast((offset >> 7) + 1); - _data = static_cast((offset & 127) + 1); - } - else - { - ptrdiff_t remainder = offset - ((*base - 1) << 7); - - if (static_cast(remainder) <= 253) - { - _data = static_cast(remainder + 1); - } - else - { - compact_set_value(this, value); - - _data = 255; - } - } - } - else - { - compact_set_value(this, value); - - _data = 255; - } - } - else - { - _data = 0; - } - } - - operator char_t*() const - { - if (_data) - { - if (_data < 255) - { - xml_memory_page* page = compact_get_page(this, header_offset); - - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - const uint16_t* base = reinterpret_cast(static_cast(reinterpret_cast(this) - base_offset)); - assert(*base); - - ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); - - return page->compact_string_base + offset; - } - else - { - return compact_get_value(this); - } - } - else - return 0; - } - - private: - unsigned char _data; - }; -PUGI__NS_END -#endif - -#ifdef PUGIXML_COMPACT -namespace pugi -{ - struct xml_attribute_struct - { - xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) - { - PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); - } - - impl::compact_header header; - - uint16_t namevalue_base; - - impl::compact_string<4, 2> name; - impl::compact_string<5, 3> value; - - impl::compact_pointer prev_attribute_c; - impl::compact_pointer next_attribute; - }; - - struct xml_node_struct - { - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) - { - PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); - } - - impl::compact_header header; - - uint16_t namevalue_base; - - impl::compact_string<4, 2> name; - impl::compact_string<5, 3> value; - - impl::compact_pointer_parent parent; - - impl::compact_pointer first_child; - - impl::compact_pointer prev_sibling_c; - impl::compact_pointer next_sibling; - - impl::compact_pointer first_attribute; - }; -} -#else -namespace pugi -{ - struct xml_attribute_struct - { - xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) - { - header = PUGI__GETHEADER_IMPL(this, page, 0); - } - - uintptr_t header; - - char_t* name; - char_t* value; - - xml_attribute_struct* prev_attribute_c; - xml_attribute_struct* next_attribute; - }; - - struct xml_node_struct - { - xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) - { - header = PUGI__GETHEADER_IMPL(this, page, type); - } - - uintptr_t header; - - char_t* name; - char_t* value; - - xml_node_struct* parent; - - xml_node_struct* first_child; - - xml_node_struct* prev_sibling_c; - xml_node_struct* next_sibling; - - xml_attribute_struct* first_attribute; - }; -} -#endif - -PUGI__NS_BEGIN - struct xml_extra_buffer - { - char_t* buffer; - xml_extra_buffer* next; - }; - - struct xml_document_struct: public xml_node_struct, public xml_allocator - { - xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) - { - #ifdef PUGIXML_COMPACT - _hash = &hash; - #endif - } - - const char_t* buffer; - - xml_extra_buffer* extra_buffers; - - #ifdef PUGIXML_COMPACT - compact_hash_table hash; - #endif - }; - - template inline xml_allocator& get_allocator(const Object* object) - { - assert(object); - - return *PUGI__GETPAGE(object)->allocator; - } - - template inline xml_document_struct& get_document(const Object* object) - { - assert(object); - - return *static_cast(PUGI__GETPAGE(object)->allocator); - } -PUGI__NS_END - -// Low-level DOM operations -PUGI__NS_BEGIN - inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) - { - xml_memory_page* page; - void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); - if (!memory) return 0; - - return new (memory) xml_attribute_struct(page); - } - - inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) - { - xml_memory_page* page; - void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); - if (!memory) return 0; - - return new (memory) xml_node_struct(page, type); - } - - inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) - { - if (a->header & impl::xml_memory_page_name_allocated_mask) - alloc.deallocate_string(a->name); - - if (a->header & impl::xml_memory_page_value_allocated_mask) - alloc.deallocate_string(a->value); - - alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); - } - - inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) - { - if (n->header & impl::xml_memory_page_name_allocated_mask) - alloc.deallocate_string(n->name); - - if (n->header & impl::xml_memory_page_value_allocated_mask) - alloc.deallocate_string(n->value); - - for (xml_attribute_struct* attr = n->first_attribute; attr; ) - { - xml_attribute_struct* next = attr->next_attribute; - - destroy_attribute(attr, alloc); - - attr = next; - } - - for (xml_node_struct* child = n->first_child; child; ) - { - xml_node_struct* next = child->next_sibling; - - destroy_node(child, alloc); - - child = next; - } - - alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); - } - - inline void append_node(xml_node_struct* child, xml_node_struct* node) - { - child->parent = node; - - xml_node_struct* head = node->first_child; - - if (head) - { - xml_node_struct* tail = head->prev_sibling_c; - - tail->next_sibling = child; - child->prev_sibling_c = tail; - head->prev_sibling_c = child; - } - else - { - node->first_child = child; - child->prev_sibling_c = child; - } - } - - inline void prepend_node(xml_node_struct* child, xml_node_struct* node) - { - child->parent = node; - - xml_node_struct* head = node->first_child; - - if (head) - { - child->prev_sibling_c = head->prev_sibling_c; - head->prev_sibling_c = child; - } - else - child->prev_sibling_c = child; - - child->next_sibling = head; - node->first_child = child; - } - - inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) - { - xml_node_struct* parent = node->parent; - - child->parent = parent; - - if (node->next_sibling) - node->next_sibling->prev_sibling_c = child; - else - parent->first_child->prev_sibling_c = child; - - child->next_sibling = node->next_sibling; - child->prev_sibling_c = node; - - node->next_sibling = child; - } - - inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) - { - xml_node_struct* parent = node->parent; - - child->parent = parent; - - if (node->prev_sibling_c->next_sibling) - node->prev_sibling_c->next_sibling = child; - else - parent->first_child = child; - - child->prev_sibling_c = node->prev_sibling_c; - child->next_sibling = node; - - node->prev_sibling_c = child; - } - - inline void remove_node(xml_node_struct* node) - { - xml_node_struct* parent = node->parent; - - if (node->next_sibling) - node->next_sibling->prev_sibling_c = node->prev_sibling_c; - else - parent->first_child->prev_sibling_c = node->prev_sibling_c; - - if (node->prev_sibling_c->next_sibling) - node->prev_sibling_c->next_sibling = node->next_sibling; - else - parent->first_child = node->next_sibling; - - node->parent = 0; - node->prev_sibling_c = 0; - node->next_sibling = 0; - } - - inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) - { - xml_attribute_struct* head = node->first_attribute; - - if (head) - { - xml_attribute_struct* tail = head->prev_attribute_c; - - tail->next_attribute = attr; - attr->prev_attribute_c = tail; - head->prev_attribute_c = attr; - } - else - { - node->first_attribute = attr; - attr->prev_attribute_c = attr; - } - } - - inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) - { - xml_attribute_struct* head = node->first_attribute; - - if (head) - { - attr->prev_attribute_c = head->prev_attribute_c; - head->prev_attribute_c = attr; - } - else - attr->prev_attribute_c = attr; - - attr->next_attribute = head; - node->first_attribute = attr; - } - - inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) - { - if (place->next_attribute) - place->next_attribute->prev_attribute_c = attr; - else - node->first_attribute->prev_attribute_c = attr; - - attr->next_attribute = place->next_attribute; - attr->prev_attribute_c = place; - place->next_attribute = attr; - } - - inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) - { - if (place->prev_attribute_c->next_attribute) - place->prev_attribute_c->next_attribute = attr; - else - node->first_attribute = attr; - - attr->prev_attribute_c = place->prev_attribute_c; - attr->next_attribute = place; - place->prev_attribute_c = attr; - } - - inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) - { - if (attr->next_attribute) - attr->next_attribute->prev_attribute_c = attr->prev_attribute_c; - else - node->first_attribute->prev_attribute_c = attr->prev_attribute_c; - - if (attr->prev_attribute_c->next_attribute) - attr->prev_attribute_c->next_attribute = attr->next_attribute; - else - node->first_attribute = attr->next_attribute; - - attr->prev_attribute_c = 0; - attr->next_attribute = 0; - } - - PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) - { - if (!alloc.reserve()) return 0; - - xml_node_struct* child = allocate_node(alloc, type); - if (!child) return 0; - - append_node(child, node); - - return child; - } - - PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) - { - if (!alloc.reserve()) return 0; - - xml_attribute_struct* attr = allocate_attribute(alloc); - if (!attr) return 0; - - append_attribute(attr, node); - - return attr; - } -PUGI__NS_END - -// Helper classes for code generation -PUGI__NS_BEGIN - struct opt_false - { - enum { value = 0 }; - }; - - struct opt_true - { - enum { value = 1 }; - }; -PUGI__NS_END - -// Unicode utilities -PUGI__NS_BEGIN - inline uint16_t endian_swap(uint16_t value) - { - return static_cast(((value & 0xff) << 8) | (value >> 8)); - } - - inline uint32_t endian_swap(uint32_t value) - { - return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); - } - - struct utf8_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) return result + 1; - // U+0080..U+07FF - else if (ch < 0x800) return result + 2; - // U+0800..U+FFFF - else return result + 3; - } - - static value_type high(value_type result, uint32_t) - { - // U+10000..U+10FFFF - return result + 4; - } - }; - - struct utf8_writer - { - typedef uint8_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - // U+0000..U+007F - if (ch < 0x80) - { - *result = static_cast(ch); - return result + 1; - } - // U+0080..U+07FF - else if (ch < 0x800) - { - result[0] = static_cast(0xC0 | (ch >> 6)); - result[1] = static_cast(0x80 | (ch & 0x3F)); - return result + 2; - } - // U+0800..U+FFFF - else - { - result[0] = static_cast(0xE0 | (ch >> 12)); - result[1] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[2] = static_cast(0x80 | (ch & 0x3F)); - return result + 3; - } - } - - static value_type high(value_type result, uint32_t ch) - { - // U+10000..U+10FFFF - result[0] = static_cast(0xF0 | (ch >> 18)); - result[1] = static_cast(0x80 | ((ch >> 12) & 0x3F)); - result[2] = static_cast(0x80 | ((ch >> 6) & 0x3F)); - result[3] = static_cast(0x80 | (ch & 0x3F)); - return result + 4; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf16_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 2; - } - }; - - struct utf16_writer - { - typedef uint16_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = static_cast(ch); - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - uint32_t msh = static_cast(ch - 0x10000) >> 10; - uint32_t lsh = static_cast(ch - 0x10000) & 0x3ff; - - result[0] = static_cast(0xD800 + msh); - result[1] = static_cast(0xDC00 + lsh); - - return result + 2; - } - - static value_type any(value_type result, uint32_t ch) - { - return (ch < 0x10000) ? low(result, ch) : high(result, ch); - } - }; - - struct utf32_counter - { - typedef size_t value_type; - - static value_type low(value_type result, uint32_t) - { - return result + 1; - } - - static value_type high(value_type result, uint32_t) - { - return result + 1; - } - }; - - struct utf32_writer - { - typedef uint32_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - - static value_type any(value_type result, uint32_t ch) - { - *result = ch; - - return result + 1; - } - }; - - struct latin1_writer - { - typedef uint8_t* value_type; - - static value_type low(value_type result, uint32_t ch) - { - *result = static_cast(ch > 255 ? '?' : ch); - - return result + 1; - } - - static value_type high(value_type result, uint32_t ch) - { - (void)ch; - - *result = '?'; - - return result + 1; - } - }; - - struct utf8_decoder - { - typedef uint8_t type; - - template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) - { - const uint8_t utf8_byte_mask = 0x3f; - - while (size) - { - uint8_t lead = *data; - - // 0xxxxxxx -> U+0000..U+007F - if (lead < 0x80) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - - // process aligned single-byte (ascii) blocks - if ((reinterpret_cast(data) & 3) == 0) - { - // round-trip through void* to silence 'cast increases required alignment of target type' warnings - while (size >= 4 && (*static_cast(static_cast(data)) & 0x80808080) == 0) - { - result = Traits::low(result, data[0]); - result = Traits::low(result, data[1]); - result = Traits::low(result, data[2]); - result = Traits::low(result, data[3]); - data += 4; - size -= 4; - } - } - } - // 110xxxxx -> U+0080..U+07FF - else if (static_cast(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); - data += 2; - size -= 2; - } - // 1110xxxx -> U+0800-U+FFFF - else if (static_cast(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) - { - result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); - data += 3; - size -= 3; - } - // 11110xxx -> U+10000..U+10FFFF - else if (static_cast(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) - { - result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); - data += 4; - size -= 4; - } - // 10xxxxxx or 11111xxx -> invalid - else - { - data += 1; - size -= 1; - } - } - - return result; - } - }; - - template struct utf16_decoder - { - typedef uint16_t type; - - template static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) - { - while (size) - { - uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+D7FF - if (lead < 0xD800) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - } - // U+E000..U+FFFF - else if (static_cast(lead - 0xE000) < 0x2000) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - } - // surrogate pair lead - else if (static_cast(lead - 0xD800) < 0x400 && size >= 2) - { - uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; - - if (static_cast(next - 0xDC00) < 0x400) - { - result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); - data += 2; - size -= 2; - } - else - { - data += 1; - size -= 1; - } - } - else - { - data += 1; - size -= 1; - } - } - - return result; - } - }; - - template struct utf32_decoder - { - typedef uint32_t type; - - template static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) - { - while (size) - { - uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; - - // U+0000..U+FFFF - if (lead < 0x10000) - { - result = Traits::low(result, lead); - data += 1; - size -= 1; - } - // U+10000..U+10FFFF - else - { - result = Traits::high(result, lead); - data += 1; - size -= 1; - } - } - - return result; - } - }; - - struct latin1_decoder - { - typedef uint8_t type; - - template static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) - { - while (size) - { - result = Traits::low(result, *data); - data += 1; - size -= 1; - } - - return result; - } - }; - - template struct wchar_selector; - - template <> struct wchar_selector<2> - { - typedef uint16_t type; - typedef utf16_counter counter; - typedef utf16_writer writer; - typedef utf16_decoder decoder; - }; - - template <> struct wchar_selector<4> - { - typedef uint32_t type; - typedef utf32_counter counter; - typedef utf32_writer writer; - typedef utf32_decoder decoder; - }; - - typedef wchar_selector::counter wchar_counter; - typedef wchar_selector::writer wchar_writer; - - struct wchar_decoder - { - typedef wchar_t type; - - template static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) - { - typedef wchar_selector::decoder decoder; - - return decoder::process(reinterpret_cast(data), size, result, traits); - } - }; - -#ifdef PUGIXML_WCHAR_MODE - PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) - { - for (size_t i = 0; i < length; ++i) - result[i] = static_cast(endian_swap(static_cast::type>(data[i]))); - } -#endif -PUGI__NS_END - -PUGI__NS_BEGIN - enum chartype_t - { - ct_parse_pcdata = 1, // \0, &, \r, < - ct_parse_attr = 2, // \0, &, \r, ', " - ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab - ct_space = 8, // \r, \n, space, tab - ct_parse_cdata = 16, // \0, ], >, \r - ct_parse_comment = 32, // \0, -, >, \r - ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . - ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : - }; - - static const unsigned char chartype_table[256] = - { - 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 - 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 - 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 - 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 - - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, - 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 - }; - - enum chartypex_t - { - ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > - ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, " - ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ - ctx_digit = 8, // 0-9 - ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . - }; - - static const unsigned char chartypex_table[256] = - { - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 - 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63 - - 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 - 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 - - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 - }; - -#ifdef PUGIXML_WCHAR_MODE - #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast(c) < 128 ? table[static_cast(c)] : table[128]) & (ct)) -#else - #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast(c)] & (ct)) -#endif - - #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) - #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) - - PUGI__FN bool is_little_endian() - { - unsigned int ui = 1; - - return *reinterpret_cast(&ui) == 1; - } - - PUGI__FN xml_encoding get_wchar_encoding() - { - PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); - - if (sizeof(wchar_t) == 2) - return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - else - return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - } - - PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) - { - #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } - #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } - - // check if we have a non-empty XML declaration - if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) - return false; - - // scan XML declaration until the encoding field - for (size_t i = 6; i + 1 < size; ++i) - { - // declaration can not contain ? in quoted values - if (data[i] == '?') - return false; - - if (data[i] == 'e' && data[i + 1] == 'n') - { - size_t offset = i; - - // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed - PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); - PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); - - // S? = S? - PUGI__SCANCHARTYPE(ct_space); - PUGI__SCANCHAR('='); - PUGI__SCANCHARTYPE(ct_space); - - // the only two valid delimiters are ' and " - uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; - - PUGI__SCANCHAR(delimiter); - - size_t start = offset; - - out_encoding = data + offset; - - PUGI__SCANCHARTYPE(ct_symbol); - - out_length = offset - start; - - PUGI__SCANCHAR(delimiter); - - return true; - } - } - - return false; - - #undef PUGI__SCANCHAR - #undef PUGI__SCANCHARTYPE - } - - PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) - { - // skip encoding autodetection if input buffer is too small - if (size < 4) return encoding_utf8; - - uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; - - // look for BOM in first few bytes - if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; - if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; - if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; - if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; - if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; - - // look for <, (contents); - - return guess_buffer_encoding(data, size); - } - - PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - size_t length = size / sizeof(char_t); - - if (is_mutable) - { - out_buffer = static_cast(const_cast(contents)); - out_length = length; - } - else - { - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - if (contents) - memcpy(buffer, contents, length * sizeof(char_t)); - else - assert(length == 0); - - buffer[length] = 0; - - out_buffer = buffer; - out_length = length + 1; - } - - return true; - } - -#ifdef PUGIXML_WCHAR_MODE - PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) - { - return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || - (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); - } - - PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - const char_t* data = static_cast(contents); - size_t length = size / sizeof(char_t); - - if (is_mutable) - { - char_t* buffer = const_cast(data); - - convert_wchar_endian_swap(buffer, data, length); - - out_buffer = buffer; - out_length = length; - } - else - { - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - convert_wchar_endian_swap(buffer, data, length); - buffer[length] = 0; - - out_buffer = buffer; - out_length = length + 1; - } - - return true; - } - - template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) - { - const typename D::type* data = static_cast(contents); - size_t data_length = size / sizeof(typename D::type); - - // first pass: get length in wchar_t units - size_t length = D::process(data, data_length, 0, wchar_counter()); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert utf16 input to wchar_t - wchar_writer::value_type obegin = reinterpret_cast(buffer); - wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // get native encoding - xml_encoding wchar_encoding = get_wchar_encoding(); - - // fast path: no conversion required - if (encoding == wchar_encoding) - return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // only endian-swapping is required - if (need_endian_swap_utf(encoding, wchar_encoding)) - return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf8 - if (encoding == encoding_utf8) - return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); - } - - // source encoding is latin1 - if (encoding == encoding_latin1) - return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); - - assert(false && "Invalid encoding"); - return false; - } -#else - template PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) - { - const typename D::type* data = static_cast(contents); - size_t data_length = size / sizeof(typename D::type); - - // first pass: get length in utf8 units - size_t length = D::process(data, data_length, 0, utf8_counter()); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert utf16 input to utf8 - uint8_t* obegin = reinterpret_cast(buffer); - uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) - { - for (size_t i = 0; i < size; ++i) - if (data[i] > 127) - return i; - - return size; - } - - PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) - { - const uint8_t* data = static_cast(contents); - size_t data_length = size; - - // get size of prefix that does not need utf8 conversion - size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); - assert(prefix_length <= data_length); - - const uint8_t* postfix = data + prefix_length; - size_t postfix_length = data_length - prefix_length; - - // if no conversion is needed, just return the original buffer - if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // first pass: get length in utf8 units - size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); - - // allocate buffer of suitable length - char_t* buffer = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!buffer) return false; - - // second pass: convert latin1 input to utf8 - memcpy(buffer, data, prefix_length); - - uint8_t* obegin = reinterpret_cast(buffer); - uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); - - assert(oend == obegin + length); - *oend = 0; - - out_buffer = buffer; - out_length = length + 1; - - return true; - } - - PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) - { - // fast path: no conversion required - if (encoding == encoding_utf8) - return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); - - // source encoding is utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder()); - } - - // source encoding is utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return (native_encoding == encoding) ? - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()) : - convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder()); - } - - // source encoding is latin1 - if (encoding == encoding_latin1) - return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); - - assert(false && "Invalid encoding"); - return false; - } -#endif - - PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) - { - // get length in utf8 characters - return wchar_decoder::process(str, length, 0, utf8_counter()); - } - - PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) - { - // convert to utf8 - uint8_t* begin = reinterpret_cast(buffer); - uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); - - assert(begin + size == end); - (void)!end; - (void)!size; - } - -#ifndef PUGIXML_NO_STL - PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) - { - // first pass: get length in utf8 characters - size_t size = as_utf8_begin(str, length); - - // allocate resulting string - std::string result; - result.resize(size); - - // second pass: convert to utf8 - if (size > 0) as_utf8_end(&result[0], size, str, length); - - return result; - } - - PUGI__FN std::basic_string as_wide_impl(const char* str, size_t size) - { - const uint8_t* data = reinterpret_cast(str); - - // first pass: get length in wchar_t units - size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); - - // allocate resulting string - std::basic_string result; - result.resize(length); - - // second pass: convert to wchar_t - if (length > 0) - { - wchar_writer::value_type begin = reinterpret_cast(&result[0]); - wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); - - assert(begin + length == end); - (void)!end; - } - - return result; - } -#endif - - template - inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) - { - // never reuse shared memory - if (header & xml_memory_page_contents_shared_mask) return false; - - size_t target_length = strlength(target); - - // always reuse document buffer memory if possible - if ((header & header_mask) == 0) return target_length >= length; - - // reuse heap memory if waste is not too great - const size_t reuse_threshold = 32; - - return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); - } - - template - PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) - { - if (source_length == 0) - { - // empty string and null pointer are equivalent, so just deallocate old memory - xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; - - if (header & header_mask) alloc->deallocate_string(dest); - - // mark the string as not allocated - dest = 0; - header &= ~header_mask; - - return true; - } - else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) - { - // we can reuse old buffer, so just copy the new data (including zero terminator) - memcpy(dest, source, source_length * sizeof(char_t)); - dest[source_length] = 0; - - return true; - } - else - { - xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; - - if (!alloc->reserve()) return false; - - // allocate new buffer - char_t* buf = alloc->allocate_string(source_length + 1); - if (!buf) return false; - - // copy the string (including zero terminator) - memcpy(buf, source, source_length * sizeof(char_t)); - buf[source_length] = 0; - - // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) - if (header & header_mask && dest) alloc->deallocate_string(dest); - - // the string is now allocated, so set the flag - dest = buf; - header |= header_mask; - - return true; - } - } - - struct gap - { - char_t* end; - size_t size; - - gap(): end(0), size(0) - { - } - - // Push new gap, move s count bytes further (skipping the gap). - // Collapse previous gap. - void push(char_t*& s, size_t count) - { - if (end) // there was a gap already; collapse it - { - // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) - assert(s >= end); - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - } - - s += count; // end of current gap - - // "merge" two gaps - end = s; - size += count; - } - - // Collapse all gaps, return past-the-end pointer - char_t* flush(char_t* s) - { - if (end) - { - // Move [old_gap_end, current_pos) to [old_gap_start, ...) - assert(s >= end); - memmove(end - size, end, reinterpret_cast(s) - reinterpret_cast(end)); - - return s - size; - } - else return s; - } - }; - - PUGI__FN char_t* strconv_escape(char_t* s, gap& g) - { - char_t* stre = s + 1; - - switch (*stre) - { - case '#': // &#... - { - unsigned int ucsc = 0; - - if (stre[1] == 'x') // &#x... (hex code) - { - stre += 2; - - char_t ch = *stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(ch - '0') <= 9) - ucsc = 16 * ucsc + (ch - '0'); - else if (static_cast((ch | ' ') - 'a') <= 5) - ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - else // &#... (dec code) - { - char_t ch = *++stre; - - if (ch == ';') return stre; - - for (;;) - { - if (static_cast(static_cast(ch) - '0') <= 9) - ucsc = 10 * ucsc + (ch - '0'); - else if (ch == ';') - break; - else // cancel - return stre; - - ch = *++stre; - } - - ++stre; - } - - #ifdef PUGIXML_WCHAR_MODE - s = reinterpret_cast(wchar_writer::any(reinterpret_cast(s), ucsc)); - #else - s = reinterpret_cast(utf8_writer::any(reinterpret_cast(s), ucsc)); - #endif - - g.push(s, stre - s); - return stre; - } - - case 'a': // &a - { - ++stre; - - if (*stre == 'm') // &am - { - if (*++stre == 'p' && *++stre == ';') // & - { - *s++ = '&'; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - else if (*stre == 'p') // &ap - { - if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' - { - *s++ = '\''; - ++stre; - - g.push(s, stre - s); - return stre; - } - } - break; - } - - case 'g': // &g - { - if (*++stre == 't' && *++stre == ';') // > - { - *s++ = '>'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - - case 'l': // &l - { - if (*++stre == 't' && *++stre == ';') // < - { - *s++ = '<'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - - case 'q': // &q - { - if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " - { - *s++ = '"'; - ++stre; - - g.push(s, stre - s); - return stre; - } - break; - } - - default: - break; - } - - return stre; - } - - // Parser utilities - #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) - #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } - #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) - #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } - #define PUGI__POPNODE() { cursor = cursor->parent; } - #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } - #define PUGI__SCANWHILE(X) { while (X) ++s; } - #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } - #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } - #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast(0) - #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } - - PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here - { - *g.flush(s) = 0; - - return s + (s[2] == '>' ? 3 : 2); - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); - - if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == 0) - { - return 0; - } - else ++s; - } - } - - typedef char_t* (*strconv_pcdata_t)(char_t*); - - template struct strconv_pcdata_impl - { - static char_t* parse(char_t* s) - { - gap g; - - char_t* begin = s; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); - - if (*s == '<') // PCDATA ends here - { - char_t* end = g.flush(s); - - if (opt_trim::value) - while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) - --end; - - *end = 0; - - return s + 1; - } - else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair - { - *s++ = '\n'; // replace first one with 0x0a - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (*s == 0) - { - char_t* end = g.flush(s); - - if (opt_trim::value) - while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) - --end; - - *end = 0; - - return s; - } - else ++s; - } - } - }; - - PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) - { - PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); - - switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim) - { - case 0: return strconv_pcdata_impl::parse; - case 1: return strconv_pcdata_impl::parse; - case 2: return strconv_pcdata_impl::parse; - case 3: return strconv_pcdata_impl::parse; - case 4: return strconv_pcdata_impl::parse; - case 5: return strconv_pcdata_impl::parse; - case 6: return strconv_pcdata_impl::parse; - case 7: return strconv_pcdata_impl::parse; - default: assert(false); return 0; // should not get here - } - } - - typedef char_t* (*strconv_attribute_t)(char_t*, char_t); - - template struct strconv_attribute_impl - { - static char_t* parse_wnorm(char_t* s, char_t end_quote) - { - gap g; - - // trim leading whitespaces - if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s; - - do ++str; - while (PUGI__IS_CHARTYPE(*str, ct_space)); - - g.push(s, str - s); - } - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); - - if (*s == end_quote) - { - char_t* str = g.flush(s); - - do *str-- = 0; - while (PUGI__IS_CHARTYPE(*str, ct_space)); - - return s + 1; - } - else if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - *s++ = ' '; - - if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - char_t* str = s + 1; - while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; - - g.push(s, str - s); - } - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_wconv(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (PUGI__IS_CHARTYPE(*s, ct_space)) - { - if (*s == '\r') - { - *s++ = ' '; - - if (*s == '\n') g.push(s, 1); - } - else *s++ = ' '; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_eol(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (*s == '\r') - { - *s++ = '\n'; - - if (*s == '\n') g.push(s, 1); - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - - static char_t* parse_simple(char_t* s, char_t end_quote) - { - gap g; - - while (true) - { - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); - - if (*s == end_quote) - { - *g.flush(s) = 0; - - return s + 1; - } - else if (opt_escape::value && *s == '&') - { - s = strconv_escape(s, g); - } - else if (!*s) - { - return 0; - } - else ++s; - } - } - }; - - PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) - { - PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); - - switch ((optmask >> 4) & 15) // get bitmask for flags (wconv wnorm eol escapes) - { - case 0: return strconv_attribute_impl::parse_simple; - case 1: return strconv_attribute_impl::parse_simple; - case 2: return strconv_attribute_impl::parse_eol; - case 3: return strconv_attribute_impl::parse_eol; - case 4: return strconv_attribute_impl::parse_wconv; - case 5: return strconv_attribute_impl::parse_wconv; - case 6: return strconv_attribute_impl::parse_wconv; - case 7: return strconv_attribute_impl::parse_wconv; - case 8: return strconv_attribute_impl::parse_wnorm; - case 9: return strconv_attribute_impl::parse_wnorm; - case 10: return strconv_attribute_impl::parse_wnorm; - case 11: return strconv_attribute_impl::parse_wnorm; - case 12: return strconv_attribute_impl::parse_wnorm; - case 13: return strconv_attribute_impl::parse_wnorm; - case 14: return strconv_attribute_impl::parse_wnorm; - case 15: return strconv_attribute_impl::parse_wnorm; - default: assert(false); return 0; // should not get here - } - } - - inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) - { - xml_parse_result result; - result.status = status; - result.offset = offset; - - return result; - } - - struct xml_parser - { - xml_allocator alloc; - xml_allocator* alloc_state; - char_t* error_offset; - xml_parse_status error_status; - - xml_parser(xml_allocator* alloc_): alloc(*alloc_), alloc_state(alloc_), error_offset(0), error_status(status_ok) - { - } - - ~xml_parser() - { - *alloc_state = alloc; - } - - // DOCTYPE consists of nested sections of the following possible types: - // , , "...", '...' - // - // - // First group can not contain nested groups - // Second group can contain nested groups of the same type - // Third group can contain all other groups - char_t* parse_doctype_primitive(char_t* s) - { - if (*s == '"' || *s == '\'') - { - // quoted string - char_t ch = *s++; - PUGI__SCANFOR(*s == ch); - if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); - - s++; - } - else if (s[0] == '<' && s[1] == '?') - { - // - s += 2; - PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype - if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); - - s += 2; - } - else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') - { - s += 4; - PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype - if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); - - s += 3; - } - else PUGI__THROW_ERROR(status_bad_doctype, s); - - return s; - } - - char_t* parse_doctype_ignore(char_t* s) - { - size_t depth = 0; - - assert(s[0] == '<' && s[1] == '!' && s[2] == '['); - s += 3; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] == '[') - { - // nested ignore section - s += 3; - depth++; - } - else if (s[0] == ']' && s[1] == ']' && s[2] == '>') - { - // ignore section end - s += 3; - - if (depth == 0) - return s; - - depth--; - } - else s++; - } - - PUGI__THROW_ERROR(status_bad_doctype, s); - } - - char_t* parse_doctype_group(char_t* s, char_t endch) - { - size_t depth = 0; - - assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); - s += 2; - - while (*s) - { - if (s[0] == '<' && s[1] == '!' && s[2] != '-') - { - if (s[2] == '[') - { - // ignore - s = parse_doctype_ignore(s); - if (!s) return s; - } - else - { - // some control group - s += 2; - depth++; - } - } - else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') - { - // unknown tag (forbidden), or some primitive group - s = parse_doctype_primitive(s); - if (!s) return s; - } - else if (*s == '>') - { - if (depth == 0) - return s; - - depth--; - s++; - } - else s++; - } - - if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); - - return s; - } - - char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) - { - // parse node contents, starting with exclamation mark - ++s; - - if (*s == '-') // 'value = s; // Save the offset. - } - - if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) - { - s = strconv_comment(s, endch); - - if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); - } - else - { - // Scan for terminating '-->'. - PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); - PUGI__CHECK_ERROR(status_bad_comment, s); - - if (PUGI__OPTSET(parse_comments)) - *s = 0; // Zero-terminate this segment at the first terminating '-'. - - s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. - } - } - else PUGI__THROW_ERROR(status_bad_comment, s); - } - else if (*s == '[') - { - // 'value = s; // Save the offset. - - if (PUGI__OPTSET(parse_eol)) - { - s = strconv_cdata(s, endch); - - if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); - } - else - { - // Scan for terminating ']]>'. - PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); - PUGI__CHECK_ERROR(status_bad_cdata, s); - - *s++ = 0; // Zero-terminate this segment. - } - } - else // Flagged for discard, but we still have to scan for the terminator. - { - // Scan for terminating ']]>'. - PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); - PUGI__CHECK_ERROR(status_bad_cdata, s); - - ++s; - } - - s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. - } - else PUGI__THROW_ERROR(status_bad_cdata, s); - } - else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) - { - s -= 2; - - if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); - - char_t* mark = s + 9; - - s = parse_doctype_group(s, endch); - if (!s) return s; - - assert((*s == 0 && endch == '>') || *s == '>'); - if (*s) *s++ = 0; - - if (PUGI__OPTSET(parse_doctype)) - { - while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; - - PUGI__PUSHNODE(node_doctype); - - cursor->value = mark; - } - } - else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); - else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); - else PUGI__THROW_ERROR(status_unrecognized_tag, s); - - return s; - } - - char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) - { - // load into registers - xml_node_struct* cursor = ref_cursor; - char_t ch = 0; - - // parse node contents, starting with question mark - ++s; - - // read PI target - char_t* target = s; - - if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); - - PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); - PUGI__CHECK_ERROR(status_bad_pi, s); - - // determine node type; stricmp / strcasecmp is not portable - bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; - - if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) - { - if (declaration) - { - // disallow non top-level declarations - if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); - - PUGI__PUSHNODE(node_declaration); - } - else - { - PUGI__PUSHNODE(node_pi); - } - - cursor->name = target; - - PUGI__ENDSEG(); - - // parse value/attributes - if (ch == '?') - { - // empty node - if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); - s += (*s == '>'); - - PUGI__POPNODE(); - } - else if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - PUGI__SKIPWS(); - - // scan for tag end - char_t* value = s; - - PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); - PUGI__CHECK_ERROR(status_bad_pi, s); - - if (declaration) - { - // replace ending ? with / so that 'element' terminates properly - *s = '/'; - - // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES - s = value; - } - else - { - // store value and step over > - cursor->value = value; - - PUGI__POPNODE(); - - PUGI__ENDSEG(); - - s += (*s == '>'); - } - } - else PUGI__THROW_ERROR(status_bad_pi, s); - } - else - { - // scan for tag end - PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); - PUGI__CHECK_ERROR(status_bad_pi, s); - - s += (s[1] == '>' ? 2 : 1); - } - - // store from registers - ref_cursor = cursor; - - return s; - } - - char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) - { - strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); - strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); - if (!strconv_pcdata) { - return nullptr; - } - char_t ch = 0; - xml_node_struct* cursor = root; - char_t* mark = s; - - while (*s != 0) - { - if (*s == '<') - { - ++s; - - LOC_TAG: - if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' - { - PUGI__PUSHNODE(node_element); // Append a new node to the tree. - - cursor->name = s; - - PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. - PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. - - if (ch == '>') - { - // end of tag - } - else if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - LOC_ATTRIBUTES: - while (true) - { - PUGI__SKIPWS(); // Eat any whitespace. - - if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... - { - xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute. - if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); - - a->name = s; // Save the offset. - - PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. - PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. - - if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - PUGI__SKIPWS(); // Eat any whitespace. - - ch = *s; - ++s; - } - - if (ch == '=') // '<... #=...' - { - PUGI__SKIPWS(); // Eat any whitespace. - - if (*s == '"' || *s == '\'') // '<... #="...' - { - ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. - ++s; // Step over the quote. - a->value = s; // Save the offset. - - s = strconv_attribute(s, ch); - - if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); - - // After this line the loop continues from the start; - // Whitespaces, / and > are ok, symbols and EOF are wrong, - // everything else will be detected - if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); - } - else PUGI__THROW_ERROR(status_bad_attribute, s); - } - else PUGI__THROW_ERROR(status_bad_attribute, s); - } - else if (*s == '/') - { - ++s; - - if (*s == '>') - { - PUGI__POPNODE(); - s++; - break; - } - else if (*s == 0 && endch == '>') - { - PUGI__POPNODE(); - break; - } - else PUGI__THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '>') - { - ++s; - - break; - } - else if (*s == 0 && endch == '>') - { - break; - } - else PUGI__THROW_ERROR(status_bad_start_element, s); - } - - // !!! - } - else if (ch == '/') // '<#.../' - { - if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); - - PUGI__POPNODE(); // Pop. - - s += (*s == '>'); - } - else if (ch == 0) - { - // we stepped over null terminator, backtrack & handle closing tag - --s; - - if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); - } - else PUGI__THROW_ERROR(status_bad_start_element, s); - } - else if (*s == '/') - { - ++s; - - char_t* name = cursor->name; - if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s); - - while (PUGI__IS_CHARTYPE(*s, ct_symbol)) - { - if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s); - } - - if (*name) - { - if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); - else PUGI__THROW_ERROR(status_end_element_mismatch, s); - } - - PUGI__POPNODE(); // Pop. - - PUGI__SKIPWS(); - - if (*s == 0) - { - if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); - } - else - { - if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); - ++s; - } - } - else if (*s == '?') // 'first_child) continue; - } - } - - if (!PUGI__OPTSET(parse_trim_pcdata)) - s = mark; - - if (cursor->parent || PUGI__OPTSET(parse_fragment)) - { - if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) - { - cursor->value = s; // Save the offset. - } - else - { - PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. - - cursor->value = s; // Save the offset. - - PUGI__POPNODE(); // Pop since this is a standalone. - } - - s = strconv_pcdata(s); - - if (!*s) break; - } - else - { - PUGI__SCANFOR(*s == '<'); // '...<' - if (!*s) break; - - ++s; - } - - // We're after '<' - goto LOC_TAG; - } - } - - // check that last tag is closed - if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); - - return s; - } - - #ifdef PUGIXML_WCHAR_MODE - static char_t* parse_skip_bom(char_t* s) - { - unsigned int bom = 0xfeff; - return (s[0] == static_cast(bom)) ? s + 1 : s; - } - #else - static char_t* parse_skip_bom(char_t* s) - { - return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; - } - #endif - - static bool has_element_node_siblings(xml_node_struct* node) - { - while (node) - { - if (PUGI__NODETYPE(node) == node_element) return true; - - node = node->next_sibling; - } - - return false; - } - - static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) - { - // early-out for empty documents - if (length == 0) - return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); - - // get last child of the root before parsing - xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; - - // create parser on stack - xml_parser parser(static_cast(xmldoc)); - - // save last character and make buffer zero-terminated (speeds up parsing) - char_t endch = buffer[length - 1]; - buffer[length - 1] = 0; - - // skip BOM to make sure it does not end up as part of parse output - char_t* buffer_data = parse_skip_bom(buffer); - - // perform actual parsing - parser.parse_tree(buffer_data, root, optmsk, endch); - - xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); - assert(result.offset >= 0 && static_cast(result.offset) <= length); - - if (result) - { - // since we removed last character, we have to handle the only possible false positive (stray <) - if (endch == '<') - return make_parse_result(status_unrecognized_tag, length - 1); - - // check if there are any element nodes parsed - xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; - - if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) - return make_parse_result(status_no_document_element, length - 1); - } - else - { - // roll back offset if it occurs on a null terminator in the source buffer - if (result.offset > 0 && static_cast(result.offset) == length - 1 && endch == 0) - result.offset--; - } - - return result; - } - }; - - // Output facilities - PUGI__FN xml_encoding get_write_native_encoding() - { - #ifdef PUGIXML_WCHAR_MODE - return get_wchar_encoding(); - #else - return encoding_utf8; - #endif - } - - PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) - { - // replace wchar encoding with utf implementation - if (encoding == encoding_wchar) return get_wchar_encoding(); - - // replace utf16 encoding with utf16 with specific endianness - if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - // replace utf32 encoding with utf32 with specific endianness - if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - // only do autodetection if no explicit encoding is requested - if (encoding != encoding_auto) return encoding; - - // assume utf8 encoding - return encoding_utf8; - } - - template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) - { - PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); - - typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); - - return static_cast(end - dest) * sizeof(*dest); - } - - template PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) - { - PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); - - typename T::value_type end = D::process(reinterpret_cast(data), length, dest, T()); - - if (opt_swap) - { - for (typename T::value_type i = dest; i != end; ++i) - *i = endian_swap(*i); - } - - return static_cast(end - dest) * sizeof(*dest); - } - -#ifdef PUGIXML_WCHAR_MODE - PUGI__FN size_t get_valid_length(const char_t* data, size_t length) - { - if (length < 1) return 0; - - // discard last character if it's the lead of a surrogate pair - return (sizeof(wchar_t) == 2 && static_cast(static_cast(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; - } - - PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) - { - // only endian-swapping is required - if (need_endian_swap_utf(encoding, get_wchar_encoding())) - { - convert_wchar_endian_swap(r_char, data, length); - - return length * sizeof(char_t); - } - - // convert to utf8 - if (encoding == encoding_utf8) - return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); - - // convert to utf16 - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); - } - - // convert to utf32 - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); - } - - // convert to latin1 - if (encoding == encoding_latin1) - return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); - - assert(false && "Invalid encoding"); - return 0; - } -#else - PUGI__FN size_t get_valid_length(const char_t* data, size_t length) - { - if (length < 5) return 0; - - for (size_t i = 1; i <= 4; ++i) - { - uint8_t ch = static_cast(data[length - i]); - - // either a standalone character or a leading one - if ((ch & 0xc0) != 0x80) return length - i; - } - - // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk - return length; - } - - PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) - { - if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; - - return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); - } - - if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) - { - xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; - - return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); - } - - if (encoding == encoding_latin1) - return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); - - assert(false && "Invalid encoding"); - return 0; - } -#endif - - class xml_buffered_writer - { - xml_buffered_writer(const xml_buffered_writer&); - xml_buffered_writer& operator=(const xml_buffered_writer&); - - public: - xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) - { - PUGI__STATIC_ASSERT(bufcapacity >= 8); - } - - size_t flush() - { - flush(buffer, bufsize); - bufsize = 0; - return 0; - } - - void flush(const char_t* data, size_t size) - { - if (size == 0) return; - - // fast path, just write data - if (encoding == get_write_native_encoding()) - writer.write(data, size * sizeof(char_t)); - else - { - // convert chunk - size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); - assert(result <= sizeof(scratch)); - - // write data - writer.write(scratch.data_u8, result); - } - } - - void write_direct(const char_t* data, size_t length) - { - // flush the remaining buffer contents - flush(); - - // handle large chunks - if (length > bufcapacity) - { - if (encoding == get_write_native_encoding()) - { - // fast path, can just write data chunk - writer.write(data, length * sizeof(char_t)); - return; - } - - // need to convert in suitable chunks - while (length > bufcapacity) - { - // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer - // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) - size_t chunk_size = get_valid_length(data, bufcapacity); - assert(chunk_size); - - // convert chunk and write - flush(data, chunk_size); - - // iterate - data += chunk_size; - length -= chunk_size; - } - - // small tail is copied below - bufsize = 0; - } - - memcpy(buffer + bufsize, data, length * sizeof(char_t)); - bufsize += length; - } - - void write_buffer(const char_t* data, size_t length) - { - size_t offset = bufsize; - - if (offset < bufcapacity && offset + length <= bufcapacity) - { - memcpy(buffer + offset, data, length * sizeof(char_t)); - bufsize = offset + length; - } - else - { - write_direct(data, length); - } - } - - void write_string(const char_t* data) - { - // write the part of the string that fits in the buffer - size_t offset = bufsize; - - while (*data && offset < bufcapacity) - buffer[offset++] = *data++; - - // write the rest - if (offset < bufcapacity) - { - bufsize = offset; - } - else - { - // backtrack a bit if we have split the codepoint - size_t length = offset - bufsize; - size_t extra = length - get_valid_length(data - length, length); - - bufsize = offset - extra; - - write_direct(data - extra, strlength(data) + extra); - } - } - - void write(char_t d0) - { - size_t offset = bufsize; - if (offset > bufcapacity - 1) offset = flush(); - - buffer[offset + 0] = d0; - bufsize = offset + 1; - } - - void write(char_t d0, char_t d1) - { - size_t offset = bufsize; - if (offset > bufcapacity - 2) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - bufsize = offset + 2; - } - - void write(char_t d0, char_t d1, char_t d2) - { - size_t offset = bufsize; - if (offset > bufcapacity - 3) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - bufsize = offset + 3; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3) - { - size_t offset = bufsize; - if (offset > bufcapacity - 4) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - buffer[offset + 3] = d3; - bufsize = offset + 4; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) - { - size_t offset = bufsize; - if (offset > bufcapacity - 5) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - buffer[offset + 3] = d3; - buffer[offset + 4] = d4; - bufsize = offset + 5; - } - - void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) - { - size_t offset = bufsize; - if (offset > bufcapacity - 6) offset = flush(); - - buffer[offset + 0] = d0; - buffer[offset + 1] = d1; - buffer[offset + 2] = d2; - buffer[offset + 3] = d3; - buffer[offset + 4] = d4; - buffer[offset + 5] = d5; - bufsize = offset + 6; - } - - // utf8 maximum expansion: x4 (-> utf32) - // utf16 maximum expansion: x2 (-> utf32) - // utf32 maximum expansion: x1 - enum - { - bufcapacitybytes = - #ifdef PUGIXML_MEMORY_OUTPUT_STACK - PUGIXML_MEMORY_OUTPUT_STACK - #else - 10240 - #endif - , - bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) - }; - - char_t buffer[bufcapacity]; - - union - { - uint8_t data_u8[4 * bufcapacity]; - uint16_t data_u16[2 * bufcapacity]; - uint32_t data_u32[bufcapacity]; - char_t data_char[bufcapacity]; - } scratch; - - xml_writer& writer; - size_t bufsize; - xml_encoding encoding; - }; - - PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type) - { - while (*s) - { - const char_t* prev = s; - - // While *s is a usual symbol - PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); - - writer.write_buffer(prev, static_cast(s - prev)); - - switch (*s) - { - case 0: break; - case '&': - writer.write('&', 'a', 'm', 'p', ';'); - ++s; - break; - case '<': - writer.write('&', 'l', 't', ';'); - ++s; - break; - case '>': - writer.write('&', 'g', 't', ';'); - ++s; - break; - case '"': - writer.write('&', 'q', 'u', 'o', 't', ';'); - ++s; - break; - default: // s is not a usual symbol - { - unsigned int ch = static_cast(*s++); - assert(ch < 32); - - writer.write('&', '#', static_cast((ch / 10) + '0'), static_cast((ch % 10) + '0'), ';'); - } - } - } - } - - PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) - { - if (flags & format_no_escapes) - writer.write_string(s); - else - text_output_escaped(writer, s, type); - } - - PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) - { - do - { - writer.write('<', '!', '[', 'C', 'D'); - writer.write('A', 'T', 'A', '['); - - const char_t* prev = s; - - // look for ]]> sequence - we can't output it as is since it terminates CDATA - while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; - - // skip ]] if we stopped at ]]>, > will go to the next CDATA section - if (*s) s += 2; - - size_t bufLenght = static_cast(s - prev); - - writer.write_buffer(prev, bufLenght); - - writer.write(']', ']', '>'); - } - while (*s); - } - - PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) - { - switch (indent_length) - { - case 1: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0]); - break; - } - - case 2: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0], indent[1]); - break; - } - - case 3: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0], indent[1], indent[2]); - break; - } - - case 4: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write(indent[0], indent[1], indent[2], indent[3]); - break; - } - - default: - { - for (unsigned int i = 0; i < depth; ++i) - writer.write_buffer(indent, indent_length); - } - } - } - - PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) - { - writer.write('<', '!', '-', '-'); - - while (*s) - { - const char_t* prev = s; - - // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body - while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; - - size_t bufLenght = static_cast(s - prev); - - writer.write_buffer(prev, bufLenght); - - if (*s) - { - assert(*s == '-'); - - writer.write('-', ' '); - ++s; - } - } - - writer.write('-', '-', '>'); - } - - PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) - { - while (*s) - { - const char_t* prev = s; - - // look for ?> sequence - we can't output it since ?> terminates PI - while (*s && !(s[0] == '?' && s[1] == '>')) ++s; - - writer.write_buffer(prev, static_cast(s - prev)); - - if (*s) - { - assert(s[0] == '?' && s[1] == '>'); - - writer.write('?', ' ', '>'); - s += 2; - } - } - } - - PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) - { - if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) - { - writer.write('\n'); - - text_output_indent(writer, indent, indent_length, depth + 1); - } - else - { - writer.write(' '); - } - - writer.write_string(a->name ? a->name + 0 : default_name); - writer.write('=', '"'); - - if (a->value) - text_output(writer, a->value, ctx_special_attr, flags); - - writer.write('"'); - } - } - - PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node->name ? node->name + 0 : default_name; - - writer.write('<'); - writer.write_string(name); - - if (node->first_attribute) - node_output_attributes(writer, node, indent, indent_length, flags, depth); - - // element nodes can have value if parse_embed_pcdata was used - if (!node->value) - { - if (!node->first_child) - { - if ((flags & format_raw) == 0) - writer.write(' '); - - writer.write('/', '>'); - - return false; - } - else - { - writer.write('>'); - - return true; - } - } - else - { - writer.write('>'); - - text_output(writer, node->value, ctx_special_pcdata, flags); - - if (!node->first_child) - { - writer.write('<', '/'); - writer.write_string(name); - writer.write('>'); - - return false; - } - else - { - return true; - } - } - } - - PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - const char_t* name = node->name ? node->name + 0 : default_name; - - writer.write('<', '/'); - writer.write_string(name); - writer.write('>'); - } - - PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) - { - const char_t* default_name = PUGIXML_TEXT(":anonymous"); - - switch (PUGI__NODETYPE(node)) - { - case node_pcdata: - text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); - break; - - case node_cdata: - text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); - break; - - case node_comment: - node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); - break; - - case node_pi: - writer.write('<', '?'); - writer.write_string(node->name ? node->name + 0 : default_name); - - if (node->value) - { - writer.write(' '); - node_output_pi_value(writer, node->value); - } - - writer.write('?', '>'); - break; - - case node_declaration: - writer.write('<', '?'); - writer.write_string(node->name ? node->name + 0 : default_name); - node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); - writer.write('?', '>'); - break; - - case node_doctype: - writer.write('<', '!', 'D', 'O', 'C'); - writer.write('T', 'Y', 'P', 'E'); - - if (node->value) - { - writer.write(' '); - writer.write_string(node->value); - } - - writer.write('>'); - break; - - default: - assert(false && "Invalid node type"); - } - } - - enum indent_flags_t - { - indent_newline = 1, - indent_indent = 2 - }; - - PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) - { - size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; - unsigned int indent_flags = indent_indent; - - xml_node_struct* node = root; - - do - { - assert(node); - - // begin writing current node - if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) - { - node_output_simple(writer, node, flags); - - indent_flags = 0; - } - else - { - if ((indent_flags & indent_newline) && (flags & format_raw) == 0) - writer.write('\n'); - - if ((indent_flags & indent_indent) && indent_length) - text_output_indent(writer, indent, indent_length, depth); - - if (PUGI__NODETYPE(node) == node_element) - { - indent_flags = indent_newline | indent_indent; - - if (node_output_start(writer, node, indent, indent_length, flags, depth)) - { - // element nodes can have value if parse_embed_pcdata was used - if (node->value) - indent_flags = 0; - - node = node->first_child; - depth++; - continue; - } - } - else if (PUGI__NODETYPE(node) == node_document) - { - indent_flags = indent_indent; - - if (node->first_child) - { - node = node->first_child; - continue; - } - } - else - { - node_output_simple(writer, node, flags); - - indent_flags = indent_newline | indent_indent; - } - } - - // continue to the next node - while (node != root) - { - if (node->next_sibling) - { - node = node->next_sibling; - break; - } - - node = node->parent; - - // write closing node - if (PUGI__NODETYPE(node) == node_element) - { - depth--; - - if ((indent_flags & indent_newline) && (flags & format_raw) == 0) - writer.write('\n'); - - if ((indent_flags & indent_indent) && indent_length) - text_output_indent(writer, indent, indent_length, depth); - - node_output_end(writer, node); - - indent_flags = indent_newline | indent_indent; - } - } - } - while (node != root); - - if ((indent_flags & indent_newline) && (flags & format_raw) == 0) - writer.write('\n'); - } - - PUGI__FN bool has_declaration(xml_node_struct* node) - { - for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) - { - xml_node_type type = PUGI__NODETYPE(child); - - if (type == node_declaration) return true; - if (type == node_element) return false; - } - - return false; - } - - PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) - { - for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) - if (a == attr) - return true; - - return false; - } - - PUGI__FN bool allow_insert_attribute(xml_node_type parent) - { - return parent == node_element || parent == node_declaration; - } - - PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) - { - if (parent != node_document && parent != node_element) return false; - if (child == node_document || child == node_null) return false; - if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; - - return true; - } - - PUGI__FN bool allow_move(xml_node parent, xml_node child) - { - // check that child can be a child of parent - if (!allow_insert_child(parent.type(), child.type())) - return false; - - // check that node is not moved between documents - if (parent.root() != child.root()) - return false; - - // check that new parent is not in the child subtree - xml_node cur = parent; - - while (cur) - { - if (cur == child) - return false; - - cur = cur.parent(); - } - - return true; - } - - template - PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) - { - assert(!dest && (header & header_mask) == 0); - - if (source) - { - if (alloc && (source_header & header_mask) == 0) - { - dest = source; - - // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared - header |= xml_memory_page_contents_shared_mask; - source_header |= xml_memory_page_contents_shared_mask; - } - else - strcpy_insitu(dest, header, header_mask, source, strlength(source)); - } - } - - PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) - { - node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); - node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); - - for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) - { - xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); - - if (da) - { - node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); - node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); - } - } - } - - PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) - { - xml_allocator& alloc = get_allocator(dn); - xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; - - node_copy_contents(dn, sn, shared_alloc); - - xml_node_struct* dit = dn; - xml_node_struct* sit = sn->first_child; - - while (sit && sit != sn) - { - if (sit != dn) - { - xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); - - if (copy) - { - node_copy_contents(copy, sit, shared_alloc); - - if (sit->first_child) - { - dit = copy; - sit = sit->first_child; - continue; - } - } - } - - // continue to the next node - do - { - if (sit->next_sibling) - { - sit = sit->next_sibling; - break; - } - - sit = sit->parent; - dit = dit->parent; - } - while (sit != sn); - } - } - - PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) - { - xml_allocator& alloc = get_allocator(da); - xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; - - node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); - node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); - } - - inline bool is_text_node(xml_node_struct* node) - { - xml_node_type type = PUGI__NODETYPE(node); - - return type == node_pcdata || type == node_cdata; - } - - // get value with conversion functions - template U string_to_integer(const char_t* value, U minneg, U maxpos) - { - U result = 0; - const char_t* s = value; - - while (PUGI__IS_CHARTYPE(*s, ct_space)) - s++; - - bool negative = (*s == '-'); - - s += (*s == '+' || *s == '-'); - - bool overflow = false; - - if (s[0] == '0' && (s[1] | ' ') == 'x') - { - s += 2; - - // since overflow detection relies on length of the sequence skip leading zeros - while (*s == '0') - s++; - - const char_t* start = s; - - for (;;) - { - if (static_cast(*s - '0') < 10) - result = result * 16 + (*s - '0'); - else if (static_cast((*s | ' ') - 'a') < 6) - result = result * 16 + ((*s | ' ') - 'a' + 10); - else - break; - - s++; - } - - size_t digits = static_cast(s - start); - - overflow = digits > sizeof(U) * 2; - } - else - { - // since overflow detection relies on length of the sequence skip leading zeros - while (*s == '0') - s++; - - const char_t* start = s; - - for (;;) - { - if (static_cast(*s - '0') < 10) - result = result * 10 + (*s - '0'); - else - break; - - s++; - } - - size_t digits = static_cast(s - start); - - PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); - - const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; - const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; - const size_t high_bit = sizeof(U) * 8 - 1; - - overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); - } - - if (negative) - return (overflow || result > minneg) ? 0 - minneg : 0 - result; - else - return (overflow || result > maxpos) ? maxpos : result; - } - - PUGI__FN int get_value_int(const char_t* value) - { - return string_to_integer(value, 0 - static_cast(INT_MIN), INT_MAX); - } - - PUGI__FN unsigned int get_value_uint(const char_t* value) - { - return string_to_integer(value, 0, UINT_MAX); - } - - PUGI__FN double get_value_double(const char_t* value) - { - #ifdef PUGIXML_WCHAR_MODE - return wcstod(value, 0); - #else - return strtod(value, 0); - #endif - } - - PUGI__FN float get_value_float(const char_t* value) - { - #ifdef PUGIXML_WCHAR_MODE - return static_cast(wcstod(value, 0)); - #else - return static_cast(strtod(value, 0)); - #endif - } - - PUGI__FN bool get_value_bool(const char_t* value) - { - // only look at first char - char_t first = *value; - - // 1*, t* (true), T* (True), y* (yes), Y* (YES) - return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN long long get_value_llong(const char_t* value) - { - return string_to_integer(value, 0 - static_cast(LLONG_MIN), LLONG_MAX); - } - - PUGI__FN unsigned long long get_value_ullong(const char_t* value) - { - return string_to_integer(value, 0, ULLONG_MAX); - } -#endif - - template struct make_unsigned; - - template <> struct make_unsigned { typedef unsigned int type; }; - template <> struct make_unsigned { typedef unsigned int type; }; - template <> struct make_unsigned { typedef unsigned long type; }; - template <> struct make_unsigned { typedef unsigned long type; }; - -#ifdef PUGIXML_HAS_LONG_LONG - template <> struct make_unsigned { typedef unsigned long long type; }; - template <> struct make_unsigned { typedef unsigned long long type; }; -#endif - - template - PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, T value) - { - typedef typename make_unsigned::type U; - - bool negative = value < 0; - - char_t* result = end - 1; - U rest = negative ? 0 - U(value) : U(value); - - do - { - *result-- = static_cast('0' + (rest % 10)); - rest /= 10; - } - while (rest); - - assert(result >= begin); - (void)begin; - - *result = '-'; - - return result + !negative; - } - - // set value with conversion functions - template - PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) - { - #ifdef PUGIXML_WCHAR_MODE - char_t wbuf[128]; - assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); - - size_t offset = 0; - for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; - - return strcpy_insitu(dest, header, header_mask, wbuf, offset); - #else - return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); - #endif - } - - template - PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, Integer value) - { - char_t buf[64]; - char_t* end = buf + sizeof(buf) / sizeof(buf[0]); - char_t* begin = integer_to_string(buf, end, value); - - return strcpy_insitu(dest, header, header_mask, begin, end - begin); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value) - { - char buf[128]; - sprintf(buf, "%.9g", value); - - return set_value_ascii(dest, header, header_mask, buf); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value) - { - char buf[128]; - sprintf(buf, "%.17g", value); - - return set_value_ascii(dest, header, header_mask, buf); - } - - template - PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, bool value) - { - return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); - } - - PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) - { - // check input buffer - if (!contents && size) return make_parse_result(status_io_error); - - // get actual encoding - xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); - - // get private buffer - char_t* buffer = 0; - size_t length = 0; - - if (!contents) { - xml_parse_result failed; - failed.status = xml_parse_status::status_internal_error; - failed.offset = (ptrdiff_t)0; - return failed; - } - - if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); - - // delete original buffer if we performed a conversion - if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); - - // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself - if (own || buffer != contents) *out_buffer = buffer; - - // store buffer for offset_debug - doc->buffer = buffer; - - // parse - xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); - - // remember encoding - res.encoding = buffer_encoding; - - return res; - } - - // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick - PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) - { - #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) - // there are 64-bit versions of fseek/ftell, let's use them - typedef __int64 length_type; - - _fseeki64(file, 0, SEEK_END); - length_type length = _ftelli64(file); - _fseeki64(file, 0, SEEK_SET); - #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) - // there are 64-bit versions of fseek/ftell, let's use them - typedef off64_t length_type; - - fseeko64(file, 0, SEEK_END); - length_type length = ftello64(file); - fseeko64(file, 0, SEEK_SET); - #else - // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. - typedef long length_type; - - fseek(file, 0, SEEK_END); - length_type length = ftell(file); - fseek(file, 0, SEEK_SET); - #endif - - // check for I/O errors - if (length < 0) return status_io_error; - - // check for overflow - size_t result = static_cast(length); - - if (static_cast(result) != length) return status_out_of_memory; - - // finalize - out_result = result; - - return status_ok; - } - - // This function assumes that buffer has extra sizeof(char_t) writable bytes after size - PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) - { - // We only need to zero-terminate if encoding conversion does not do it for us - #ifdef PUGIXML_WCHAR_MODE - xml_encoding wchar_encoding = get_wchar_encoding(); - - if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) - { - size_t length = size / sizeof(char_t); - - static_cast(buffer)[length] = 0; - return (length + 1) * sizeof(char_t); - } - #else - if (encoding == encoding_utf8) - { - static_cast(buffer)[size] = 0; - return size + 1; - } - #endif - - return size; - } - - PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) - { - if (!file) return make_parse_result(status_file_not_found); - - // get file size (can result in I/O errors) - size_t size = 0; - xml_parse_status size_status = get_file_size(file, size); - if (size_status != status_ok) return make_parse_result(size_status); - - size_t max_suffix_size = sizeof(char_t); - - // allocate buffer for the whole file - char* contents = static_cast(xml_memory::allocate(size + max_suffix_size)); - if (!contents) return make_parse_result(status_out_of_memory); - - // read file in memory - size_t read_size = fread(contents, 1, size, file); - - if (read_size != size) - { - xml_memory::deallocate(contents); - return make_parse_result(status_io_error); - } - - xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); - - return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); - } - - PUGI__FN void close_file(FILE* file) - { - fclose(file); - } - -#ifndef PUGIXML_NO_STL - template struct xml_stream_chunk - { - static xml_stream_chunk* create() - { - void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); - if (!memory) return 0; - - return new (memory) xml_stream_chunk(); - } - - static void destroy(xml_stream_chunk* chunk) - { - // free chunk chain - while (chunk) - { - xml_stream_chunk* next_ = chunk->next; - - xml_memory::deallocate(chunk); - - chunk = next_; - } - } - - xml_stream_chunk(): next(0), size(0) - { - } - - xml_stream_chunk* next; - size_t size; - - T data[xml_memory_page_size / sizeof(T)]; - }; - - template PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream& stream, void** out_buffer, size_t* out_size) - { - auto_deleter > chunks(0, xml_stream_chunk::destroy); - - // read file to a chunk list - size_t total = 0; - xml_stream_chunk* last = 0; - - while (!stream.eof()) - { - // allocate new chunk - xml_stream_chunk* chunk = xml_stream_chunk::create(); - if (!chunk) return status_out_of_memory; - - // append chunk to list - if (last) last = last->next = chunk; - else chunks.data = last = chunk; - - // read data to chunk - stream.read(chunk->data, static_cast(sizeof(chunk->data) / sizeof(T))); - chunk->size = static_cast(stream.gcount()) * sizeof(T); - - // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors - if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; - - // guard against huge files (chunk size is small enough to make this overflow check work) - if (total + chunk->size < total) return status_out_of_memory; - total += chunk->size; - } - - size_t max_suffix_size = sizeof(char_t); - - // copy chunk list to a contiguous buffer - char* buffer = static_cast(xml_memory::allocate(total + max_suffix_size)); - if (!buffer) return status_out_of_memory; - - char* write = buffer; - - for (xml_stream_chunk* chunk = chunks.data; chunk; chunk = chunk->next) - { - assert(write + chunk->size <= buffer + total); - memcpy(write, chunk->data, chunk->size); - write += chunk->size; - } - - assert(write == buffer + total); - - // return buffer - *out_buffer = buffer; - *out_size = total; - - return status_ok; - } - - template PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream& stream, void** out_buffer, size_t* out_size) - { - // get length of remaining data in stream - typename std::basic_istream::pos_type pos = stream.tellg(); - stream.seekg(0, std::ios::end); - std::streamoff length = stream.tellg() - pos; - stream.seekg(pos); - - if (stream.fail() || pos < 0) return status_io_error; - - // guard against huge files - size_t read_length = static_cast(length); - - if (static_cast(read_length) != length || length < 0) return status_out_of_memory; - - size_t max_suffix_size = sizeof(char_t); - - // read stream data into memory (guard against stream exceptions with buffer holder) - auto_deleter buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); - if (!buffer.data) return status_out_of_memory; - - stream.read(static_cast(buffer.data), static_cast(read_length)); - - // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors - if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; - - // return buffer - size_t actual_length = static_cast(stream.gcount()); - assert(actual_length <= read_length); - - *out_buffer = buffer.release(); - *out_size = actual_length * sizeof(T); - - return status_ok; - } - - template PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) - { - void* buffer = 0; - size_t size = 0; - xml_parse_status status = status_ok; - - // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) - if (stream.fail()) return make_parse_result(status_io_error); - - // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) - if (stream.tellg() < 0) - { - stream.clear(); // clear error flags that could be set by a failing tellg - status = load_stream_data_noseek(stream, &buffer, &size); - } - else - status = load_stream_data_seek(stream, &buffer, &size); - - if (status != status_ok) return make_parse_result(status); - - xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); - - return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); - } -#endif - -#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) - PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) - { - return _wfopen(path, mode); - } -#else - PUGI__FN char* convert_path_heap(const wchar_t* str) - { - assert(str); - - // first pass: get length in utf8 characters - size_t length = strlength_wide(str); - size_t size = as_utf8_begin(str, length); - - // allocate resulting string - char* result = static_cast(xml_memory::allocate(size + 1)); - if (!result) return 0; - - // second pass: convert to utf8 - as_utf8_end(result, size, str, length); - - // zero-terminate - result[size] = 0; - - return result; - } - - PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) - { - // there is no standard function to open wide paths, so our best bet is to try utf8 path - char* path_utf8 = convert_path_heap(path); - if (!path_utf8) return 0; - - // convert mode to ASCII (we mirror _wfopen interface) - char mode_ascii[4] = {0}; - for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast(mode[i]); - - // try to open the utf8 path - FILE* result = fopen(path_utf8, mode_ascii); - - // free dummy buffer - xml_memory::deallocate(path_utf8); - - return result; - } -#endif - - PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) - { - if (!file) return false; - - xml_writer_file writer(file); - doc.save(writer, indent, flags, encoding); - - return ferror(file) == 0; - } - - struct name_null_sentry - { - xml_node_struct* node; - char_t* name; - - name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) - { - node->name = 0; - } - - ~name_null_sentry() - { - node->name = name; - } - }; -PUGI__NS_END - -namespace pugi -{ - PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) - { - } - - PUGI__FN void xml_writer_file::write(const void* data, size_t size) - { - size_t result = fwrite(data, 1, size, static_cast(file)); - (void)!result; // unfortunately we can't do proper error handling here - } - -#ifndef PUGIXML_NO_STL - PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(&stream), wide_stream(0) - { - } - - PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream >& stream): narrow_stream(0), wide_stream(&stream) - { - } - - PUGI__FN void xml_writer_stream::write(const void* data, size_t size) - { - if (narrow_stream) - { - assert(!wide_stream); - narrow_stream->write(reinterpret_cast(data), static_cast(size)); - } - else - { - assert(wide_stream); - assert(size % sizeof(wchar_t) == 0); - - wide_stream->write(reinterpret_cast(data), static_cast(size / sizeof(wchar_t))); - } - } -#endif - - PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) - { - } - - PUGI__FN xml_tree_walker::~xml_tree_walker() - { - } - - PUGI__FN int xml_tree_walker::depth() const - { - return _depth; - } - - PUGI__FN bool xml_tree_walker::begin(xml_node&) - { - return true; - } - - PUGI__FN bool xml_tree_walker::end(xml_node&) - { - return true; - } - - PUGI__FN xml_attribute::xml_attribute(): _attr(0) - { - } - - PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) - { - } - - PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) - { - } - - PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const - { - return _attr ? unspecified_bool_xml_attribute : 0; - } - - PUGI__FN bool xml_attribute::operator!() const - { - return !_attr; - } - - PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const - { - return (_attr == r._attr); - } - - PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const - { - return (_attr != r._attr); - } - - PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const - { - return (_attr < r._attr); - } - - PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const - { - return (_attr > r._attr); - } - - PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const - { - return (_attr <= r._attr); - } - - PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const - { - return (_attr >= r._attr); - } - - PUGI__FN xml_attribute xml_attribute::next_attribute() const - { - return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute(); - } - - PUGI__FN xml_attribute xml_attribute::previous_attribute() const - { - return _attr && _attr->prev_attribute_c->next_attribute ? xml_attribute(_attr->prev_attribute_c) : xml_attribute(); - } - - PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const - { - return (_attr && _attr->value) ? _attr->value + 0 : def; - } - - PUGI__FN int xml_attribute::as_int(int def) const - { - return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def; - } - - PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const - { - return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def; - } - - PUGI__FN double xml_attribute::as_double(double def) const - { - return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def; - } - - PUGI__FN float xml_attribute::as_float(float def) const - { - return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def; - } - - PUGI__FN bool xml_attribute::as_bool(bool def) const - { - return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def; - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN long long xml_attribute::as_llong(long long def) const - { - return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def; - } - - PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const - { - return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def; - } -#endif - - PUGI__FN bool xml_attribute::empty() const - { - return !_attr; - } - - PUGI__FN const char_t* xml_attribute::name() const - { - return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* xml_attribute::value() const - { - return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT(""); - } - - PUGI__FN size_t xml_attribute::hash_value() const - { - return static_cast(reinterpret_cast(_attr) / sizeof(xml_attribute_struct)); - } - - PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const - { - return _attr; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) - { - set_value(rhs); - return *this; - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) - { - set_value(rhs); - return *this; - } - - PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) - { - set_value(rhs); - return *this; - } -#endif - - PUGI__FN bool xml_attribute::set_name(const char_t* rhs) - { - if (!_attr) return false; - - return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); - } - - PUGI__FN bool xml_attribute::set_value(const char_t* rhs) - { - if (!_attr) return false; - - return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); - } - - PUGI__FN bool xml_attribute::set_value(int rhs) - { - if (!_attr) return false; - - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(unsigned int rhs) - { - if (!_attr) return false; - - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(long rhs) - { - if (!_attr) return false; - - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(unsigned long rhs) - { - if (!_attr) return false; - - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(double rhs) - { - if (!_attr) return false; - - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(float rhs) - { - if (!_attr) return false; - - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(bool rhs) - { - if (!_attr) return false; - - return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN bool xml_attribute::set_value(long long rhs) - { - if (!_attr) return false; - - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } - - PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) - { - if (!_attr) return false; - - return impl::set_value_integer(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); - } -#endif - -#ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - PUGI__FN xml_node::xml_node(): _root(0) - { - } - - PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) - { - } - - PUGI__FN static void unspecified_bool_xml_node(xml_node***) - { - } - - PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const - { - return _root ? unspecified_bool_xml_node : 0; - } - - PUGI__FN bool xml_node::operator!() const - { - return !_root; - } - - PUGI__FN xml_node::iterator xml_node::begin() const - { - return iterator(_root ? _root->first_child + 0 : 0, _root); - } - - PUGI__FN xml_node::iterator xml_node::end() const - { - return iterator(0, _root); - } - - PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const - { - return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); - } - - PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const - { - return attribute_iterator(0, _root); - } - - PUGI__FN xml_object_range xml_node::children() const - { - return xml_object_range(begin(), end()); - } - - PUGI__FN xml_object_range xml_node::children(const char_t* name_) const - { - return xml_object_range(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); - } - - PUGI__FN xml_object_range xml_node::attributes() const - { - return xml_object_range(attributes_begin(), attributes_end()); - } - - PUGI__FN bool xml_node::operator==(const xml_node& r) const - { - return (_root == r._root); - } - - PUGI__FN bool xml_node::operator!=(const xml_node& r) const - { - return (_root != r._root); - } - - PUGI__FN bool xml_node::operator<(const xml_node& r) const - { - return (_root < r._root); - } - - PUGI__FN bool xml_node::operator>(const xml_node& r) const - { - return (_root > r._root); - } - - PUGI__FN bool xml_node::operator<=(const xml_node& r) const - { - return (_root <= r._root); - } - - PUGI__FN bool xml_node::operator>=(const xml_node& r) const - { - return (_root >= r._root); - } - - PUGI__FN bool xml_node::empty() const - { - return !_root; - } - - PUGI__FN const char_t* xml_node::name() const - { - return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT(""); - } - - PUGI__FN xml_node_type xml_node::type() const - { - return _root ? PUGI__NODETYPE(_root) : node_null; - } - - PUGI__FN const char_t* xml_node::value() const - { - return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT(""); - } - - PUGI__FN xml_node xml_node::child(const char_t* name_) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); - - return xml_node(); - } - - PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) - if (i->name && impl::strequal(name_, i->name)) - return xml_attribute(i); - - return xml_attribute(); - } - - PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); - - return xml_node(); - } - - PUGI__FN xml_node xml_node::next_sibling() const - { - return _root ? xml_node(_root->next_sibling) : xml_node(); - } - - PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) - if (i->name && impl::strequal(name_, i->name)) return xml_node(i); - - return xml_node(); - } - - PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const - { - xml_attribute_struct* hint = hint_._attr; - - // if hint is not an attribute of node, behavior is not defined - assert(!hint || (_root && impl::is_attribute_of(hint, _root))); - - if (!_root) return xml_attribute(); - - // optimistically search from hint up until the end - for (xml_attribute_struct* i = hint; i; i = i->next_attribute) - if (i->name && impl::strequal(name_, i->name)) - { - // update hint to maximize efficiency of searching for consecutive attributes - hint_._attr = i->next_attribute; - - return xml_attribute(i); - } - - // wrap around and search from the first attribute until the hint - // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails - for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) - if (j->name && impl::strequal(name_, j->name)) - { - // update hint to maximize efficiency of searching for consecutive attributes - hint_._attr = j->next_attribute; - - return xml_attribute(j); - } - - return xml_attribute(); - } - - PUGI__FN xml_node xml_node::previous_sibling() const - { - if (!_root) return xml_node(); - - if (_root->prev_sibling_c->next_sibling) return xml_node(_root->prev_sibling_c); - else return xml_node(); - } - - PUGI__FN xml_node xml_node::parent() const - { - return _root ? xml_node(_root->parent) : xml_node(); - } - - PUGI__FN xml_node xml_node::root() const - { - return _root ? xml_node(&impl::get_document(_root)) : xml_node(); - } - - PUGI__FN xml_text xml_node::text() const - { - return xml_text(_root); - } - - PUGI__FN const char_t* xml_node::child_value() const - { - if (!_root) return PUGIXML_TEXT(""); - - // element nodes can have value if parse_embed_pcdata was used - if (PUGI__NODETYPE(_root) == node_element && _root->value) - return _root->value; - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (impl::is_text_node(i) && i->value) - return i->value; - - return PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const - { - return child(name_).child_value(); - } - - PUGI__FN xml_attribute xml_node::first_attribute() const - { - return _root ? xml_attribute(_root->first_attribute) : xml_attribute(); - } - - PUGI__FN xml_attribute xml_node::last_attribute() const - { - return _root && _root->first_attribute ? xml_attribute(_root->first_attribute->prev_attribute_c) : xml_attribute(); - } - - PUGI__FN xml_node xml_node::first_child() const - { - return _root ? xml_node(_root->first_child) : xml_node(); - } - - PUGI__FN xml_node xml_node::last_child() const - { - return _root && _root->first_child ? xml_node(_root->first_child->prev_sibling_c) : xml_node(); - } - - PUGI__FN bool xml_node::set_name(const char_t* rhs) - { - xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - - if (!_root && type_ != node_element && type_ != node_pi && type_ != node_declaration) - return false; - - return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); - } - - PUGI__FN bool xml_node::set_value(const char_t* rhs) - { - xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; - - if (!_root && type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) - return false; - - return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); - } - - PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::append_attribute(a._attr, _root); - - a.set_name(name_); - - return a; - } - - PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::prepend_attribute(a._attr, _root); - - a.set_name(name_); - - return a; - } - - PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::insert_attribute_after(a._attr, attr._attr, _root); - - a.set_name(name_); - - return a; - } - - PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) - { - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::insert_attribute_before(a._attr, attr._attr, _root); - - a.set_name(name_); - - return a; - } - - PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::append_attribute(a._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); - - return a; - } - - PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::prepend_attribute(a._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); - - return a; - } - - PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::insert_attribute_after(a._attr, attr._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); - - return a; - } - - PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) - { - if (!proto) return xml_attribute(); - if (!impl::allow_insert_attribute(type())) return xml_attribute(); - if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_attribute(); - - xml_attribute a(impl::allocate_attribute(alloc)); - if (!a) return xml_attribute(); - - impl::insert_attribute_before(a._attr, attr._attr, _root); - impl::node_copy_attribute(a._attr, proto._attr); - - return a; - } - - PUGI__FN xml_node xml_node::append_child(xml_node_type type_) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::append_node(n._root, _root); - - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::prepend_node(n._root, _root); - - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::insert_node_before(n._root, node._root); - - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) - { - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::insert_node_after(n._root, node._root); - - if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); - - return n; - } - - PUGI__FN xml_node xml_node::append_child(const char_t* name_) - { - xml_node result = append_child(node_element); - - result.set_name(name_); - - return result; - } - - PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) - { - xml_node result = prepend_child(node_element); - - result.set_name(name_); - - return result; - } - - PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) - { - xml_node result = insert_child_after(node_element, node); - - result.set_name(name_); - - return result; - } - - PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) - { - xml_node result = insert_child_before(node_element, node); - - result.set_name(name_); - - return result; - } - - PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::append_node(n._root, _root); - impl::node_copy_tree(n._root, proto._root); - - return n; - } - - PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::prepend_node(n._root, _root); - impl::node_copy_tree(n._root, proto._root); - - return n; - } - - PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::insert_node_after(n._root, node._root); - impl::node_copy_tree(n._root, proto._root); - - return n; - } - - PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) - { - xml_node_type type_ = proto.type(); - if (!impl::allow_insert_child(type(), type_)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - xml_node n(impl::allocate_node(alloc, type_)); - if (!n) return xml_node(); - - impl::insert_node_before(n._root, node._root); - impl::node_copy_tree(n._root, proto._root); - - return n; - } - - PUGI__FN xml_node xml_node::append_move(const xml_node& moved) - { - if (!impl::allow_move(*this, moved)) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; - - impl::remove_node(moved._root); - impl::append_node(moved._root, _root); - - return moved; - } - - PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) - { - if (!impl::allow_move(*this, moved)) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; - - impl::remove_node(moved._root); - impl::prepend_node(moved._root, _root); - - return moved; - } - - PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) - { - if (!impl::allow_move(*this, moved)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - if (moved._root == node._root) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; - - impl::remove_node(moved._root); - impl::insert_node_after(moved._root, node._root); - - return moved; - } - - PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) - { - if (!impl::allow_move(*this, moved)) return xml_node(); - if (!node._root || node._root->parent != _root) return xml_node(); - if (moved._root == node._root) return xml_node(); - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return xml_node(); - - // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers - impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; - - impl::remove_node(moved._root); - impl::insert_node_before(moved._root, node._root); - - return moved; - } - - PUGI__FN bool xml_node::remove_attribute(const char_t* name_) - { - return remove_attribute(attribute(name_)); - } - - PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) - { - if (!_root || !a._attr) return false; - if (!impl::is_attribute_of(a._attr, _root)) return false; - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return false; - - impl::remove_attribute(a._attr, _root); - impl::destroy_attribute(a._attr, alloc); - - return true; - } - - PUGI__FN bool xml_node::remove_child(const char_t* name_) - { - return remove_child(child(name_)); - } - - PUGI__FN bool xml_node::remove_child(const xml_node& n) - { - if (!_root || !n._root || n._root->parent != _root) return false; - - impl::xml_allocator& alloc = impl::get_allocator(_root); - if (!alloc.reserve()) return false; - - impl::remove_node(n._root); - impl::destroy_node(n._root, alloc); - - return true; - } - - PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - // append_buffer is only valid for elements/documents - if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); - - // get document node - impl::xml_document_struct* doc = &impl::get_document(_root); - - // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense - doc->header |= impl::xml_memory_page_contents_shared_mask; - - // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) - impl::xml_memory_page* page = 0; - impl::xml_extra_buffer* extra = static_cast(doc->allocate_memory(sizeof(impl::xml_extra_buffer), page)); - (void)page; - - if (!extra) return impl::make_parse_result(status_out_of_memory); - - // add extra buffer to the list - extra->buffer = 0; - extra->next = doc->extra_buffers; - doc->extra_buffers = extra; - - // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level - impl::name_null_sentry sentry(_root); - - return impl::load_buffer_impl(doc, _root, const_cast(contents), size, options, encoding, false, false, &extra->buffer); - } - - PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (i->name && impl::strequal(name_, i->name)) - { - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) - return xml_node(i); - } - - return xml_node(); - } - - PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const - { - if (!_root) return xml_node(); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) - if (a->name && impl::strequal(attr_name, a->name) && impl::strequal(attr_value, a->value ? a->value + 0 : PUGIXML_TEXT(""))) - return xml_node(i); - - return xml_node(); - } - -#ifndef PUGIXML_NO_STL - PUGI__FN string_t xml_node::path(char_t delimiter) const - { - if (!_root) return string_t(); - - size_t offset = 0; - - for (xml_node_struct* i = _root; i; i = i->parent) - { - offset += (i != _root); - offset += i->name ? impl::strlength(i->name) : 0; - } - - string_t result; - result.resize(offset); - - for (xml_node_struct* j = _root; j; j = j->parent) - { - if (j != _root) - result[--offset] = delimiter; - - if (j->name && *j->name) - { - size_t length = impl::strlength(j->name); - - offset -= length; - memcpy(&result[offset], j->name, length * sizeof(char_t)); - } - } - - assert(offset == 0); - - return result; - } -#endif - - PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const - { - xml_node found = *this; // Current search context. - - if (!_root || !path_ || !path_[0]) return found; - - if (path_[0] == delimiter) - { - // Absolute path; e.g. '/foo/bar' - found = found.root(); - ++path_; - } - - const char_t* path_segment = path_; - - while (*path_segment == delimiter) ++path_segment; - - const char_t* path_segment_end = path_segment; - - while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; - - if (path_segment == path_segment_end) return found; - - const char_t* next_segment = path_segment_end; - - while (*next_segment == delimiter) ++next_segment; - - if (*path_segment == '.' && path_segment + 1 == path_segment_end) - return found.first_element_by_path(next_segment, delimiter); - else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) - return found.parent().first_element_by_path(next_segment, delimiter); - else - { - if (found._root) { - for (xml_node_struct* j = found._root->first_child; j; j = j->next_sibling) - { - if (j->name && impl::strequalrange(j->name, path_segment, static_cast(path_segment_end - path_segment))) - { - xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); - - if (subsearch) return subsearch; - } - } - } - - return xml_node(); - } - } - - PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) - { - walker._depth = -1; - - xml_node arg_begin = *this; - if (!walker.begin(arg_begin)) return false; - - xml_node cur = first_child(); - - if (cur) - { - ++walker._depth; - - do - { - xml_node arg_for_each = cur; - if (!walker.for_each(arg_for_each)) - return false; - - if (cur.first_child()) - { - ++walker._depth; - cur = cur.first_child(); - } - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - // Borland C++ workaround - while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) - { - --walker._depth; - cur = cur.parent(); - } - - if (cur != *this) - cur = cur.next_sibling(); - } - } - while (cur && cur != *this); - } - - assert(walker._depth == -1); - - xml_node arg_end = *this; - return walker.end(arg_end); - } - - PUGI__FN size_t xml_node::hash_value() const - { - return static_cast(reinterpret_cast(_root) / sizeof(xml_node_struct)); - } - - PUGI__FN xml_node_struct* xml_node::internal_object() const - { - return _root; - } - - PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - if (!_root) return; - - impl::xml_buffered_writer buffered_writer(writer, encoding); - - impl::node_output(buffered_writer, _root, indent, flags, depth); - - buffered_writer.flush(); - } - -#ifndef PUGIXML_NO_STL - PUGI__FN void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const - { - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding, depth); - } - - PUGI__FN void xml_node::print(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const - { - xml_writer_stream writer(stream); - - print(writer, indent, flags, encoding_wchar, depth); - } -#endif - - PUGI__FN ptrdiff_t xml_node::offset_debug() const - { - if (!_root) return -1; - - impl::xml_document_struct& doc = impl::get_document(_root); - - // we can determine the offset reliably only if there is exactly once parse buffer - if (!doc.buffer || doc.extra_buffers) return -1; - - switch (type()) - { - case node_document: - return 0; - - case node_element: - case node_declaration: - case node_pi: - return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; - - case node_pcdata: - case node_cdata: - case node_comment: - case node_doctype: - return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; - - default: - return -1; - } - } - -#ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - PUGI__FN bool operator||(const xml_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) - { - } - - PUGI__FN xml_node_struct* xml_text::_data() const - { - if (!_root || impl::is_text_node(_root)) return _root; - - // element nodes can have value if parse_embed_pcdata was used - if (PUGI__NODETYPE(_root) == node_element && _root->value) - return _root; - - for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) - if (impl::is_text_node(node)) - return node; - - return 0; - } - - PUGI__FN xml_node_struct* xml_text::_data_new() - { - xml_node_struct* d = _data(); - if (d) return d; - - return xml_node(_root).append_child(node_pcdata).internal_object(); - } - - PUGI__FN xml_text::xml_text(): _root(0) - { - } - - PUGI__FN static void unspecified_bool_xml_text(xml_text***) - { - } - - PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const - { - return _data() ? unspecified_bool_xml_text : 0; - } - - PUGI__FN bool xml_text::operator!() const - { - return !_data(); - } - - PUGI__FN bool xml_text::empty() const - { - return _data() == 0; - } - - PUGI__FN const char_t* xml_text::get() const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? d->value + 0 : PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* xml_text::as_string(const char_t* def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? d->value + 0 : def; - } - - PUGI__FN int xml_text::as_int(int def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? impl::get_value_int(d->value) : def; - } - - PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? impl::get_value_uint(d->value) : def; - } - - PUGI__FN double xml_text::as_double(double def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? impl::get_value_double(d->value) : def; - } - - PUGI__FN float xml_text::as_float(float def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? impl::get_value_float(d->value) : def; - } - - PUGI__FN bool xml_text::as_bool(bool def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? impl::get_value_bool(d->value) : def; - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN long long xml_text::as_llong(long long def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? impl::get_value_llong(d->value) : def; - } - - PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const - { - xml_node_struct* d = _data(); - - return (d && d->value) ? impl::get_value_ullong(d->value) : def; - } -#endif - - PUGI__FN bool xml_text::set(const char_t* rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; - } - - PUGI__FN bool xml_text::set(int rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - - PUGI__FN bool xml_text::set(unsigned int rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - - PUGI__FN bool xml_text::set(long rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - - PUGI__FN bool xml_text::set(unsigned long rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - - PUGI__FN bool xml_text::set(float rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - - PUGI__FN bool xml_text::set(double rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - - PUGI__FN bool xml_text::set(bool rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN bool xml_text::set(long long rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } - - PUGI__FN bool xml_text::set(unsigned long long rhs) - { - xml_node_struct* dn = _data_new(); - - return dn ? impl::set_value_integer(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; - } -#endif - - PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(int rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(long rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(double rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(float rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(bool rhs) - { - set(rhs); - return *this; - } - -#ifdef PUGIXML_HAS_LONG_LONG - PUGI__FN xml_text& xml_text::operator=(long long rhs) - { - set(rhs); - return *this; - } - - PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) - { - set(rhs); - return *this; - } -#endif - - PUGI__FN xml_node xml_text::data() const - { - return xml_node(_data()); - } - -#ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - PUGI__FN bool operator||(const xml_text& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - PUGI__FN xml_node_iterator::xml_node_iterator() - { - } - - PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) - { - } - - PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const - { - return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; - } - - PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const - { - return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; - } - - PUGI__FN xml_node& xml_node_iterator::operator*() const - { - assert(_wrap._root); - return _wrap; - } - - PUGI__FN xml_node* xml_node_iterator::operator->() const - { - assert(_wrap._root); - return const_cast(&_wrap); // BCC5 workaround - } - - PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() - { - assert(_wrap._root); - _wrap._root = _wrap._root->next_sibling; - return *this; - } - - PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) - { - xml_node_iterator temp = *this; - ++*this; - return temp; - } - - PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() - { - _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); - return *this; - } - - PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) - { - xml_node_iterator temp = *this; - --*this; - return temp; - } - - PUGI__FN xml_attribute_iterator::xml_attribute_iterator() - { - } - - PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) - { - } - - PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) - { - } - - PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const - { - return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; - } - - PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const - { - return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; - } - - PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const - { - assert(_wrap._attr); - return _wrap; - } - - PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const - { - assert(_wrap._attr); - return const_cast(&_wrap); // BCC5 workaround - } - - PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() - { - assert(_wrap._attr); - _wrap._attr = _wrap._attr->next_attribute; - return *this; - } - - PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) - { - xml_attribute_iterator temp = *this; - ++*this; - return temp; - } - - PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() - { - _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); - return *this; - } - - PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) - { - xml_attribute_iterator temp = *this; - --*this; - return temp; - } - - PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) - { - } - - PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) - { - } - - PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) - { - } - - PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const - { - return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; - } - - PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const - { - return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; - } - - PUGI__FN xml_node& xml_named_node_iterator::operator*() const - { - assert(_wrap._root); - return _wrap; - } - - PUGI__FN xml_node* xml_named_node_iterator::operator->() const - { - assert(_wrap._root); - return const_cast(&_wrap); // BCC5 workaround - } - - PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() - { - assert(_wrap._root); - _wrap = _wrap.next_sibling(_name); - return *this; - } - - PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) - { - xml_named_node_iterator temp = *this; - ++*this; - return temp; - } - - PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() - { - if (_wrap._root) - _wrap = _wrap.previous_sibling(_name); - else - { - _wrap = _parent.last_child(); - - if (!impl::strequal(_wrap.name(), _name)) - _wrap = _wrap.previous_sibling(_name); - } - - return *this; - } - - PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) - { - xml_named_node_iterator temp = *this; - --*this; - return temp; - } - - PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) - { - } - - PUGI__FN xml_parse_result::operator bool() const - { - return status == status_ok; - } - - PUGI__FN const char* xml_parse_result::description() const - { - switch (status) - { - case status_ok: return "No error"; - - case status_file_not_found: return "File was not found"; - case status_io_error: return "Error reading from file/stream"; - case status_out_of_memory: return "Could not allocate memory"; - case status_internal_error: return "Internal error occurred"; - - case status_unrecognized_tag: return "Could not determine tag type"; - - case status_bad_pi: return "Error parsing document declaration/processing instruction"; - case status_bad_comment: return "Error parsing comment"; - case status_bad_cdata: return "Error parsing CDATA section"; - case status_bad_doctype: return "Error parsing document type declaration"; - case status_bad_pcdata: return "Error parsing PCDATA section"; - case status_bad_start_element: return "Error parsing start element tag"; - case status_bad_attribute: return "Error parsing element attribute"; - case status_bad_end_element: return "Error parsing end element tag"; - case status_end_element_mismatch: return "Start-end tags mismatch"; - - case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; - - case status_no_document_element: return "No document element found"; - - default: return "Unknown error"; - } - } - - PUGI__FN xml_document::xml_document(): _buffer(0) - { - create(); - } - - PUGI__FN xml_document::~xml_document() - { - destroy(); - } - - PUGI__FN void xml_document::reset() - { - destroy(); - create(); - } - - PUGI__FN void xml_document::reset(const xml_document& proto) - { - reset(); - - for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling()) - append_copy(cur); - } - - PUGI__FN void xml_document::create() - { - assert(!_root); - - #ifdef PUGIXML_COMPACT - const size_t page_offset = sizeof(uint32_t); - #else - const size_t page_offset = 0; - #endif - - // initialize sentinel page - PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); - - // prepare page structure - impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); - assert(page); - - page->busy_size = impl::xml_memory_page_size; - - // setup first page marker - #ifdef PUGIXML_COMPACT - // round-trip through void* to avoid 'cast increases required alignment of target type' warning - page->compact_page_marker = reinterpret_cast(static_cast(reinterpret_cast(page) + sizeof(impl::xml_memory_page))); - *page->compact_page_marker = sizeof(impl::xml_memory_page); - #endif - - // allocate new root - _root = new (reinterpret_cast(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); - _root->prev_sibling_c = _root; - - // setup sentinel page - page->allocator = static_cast(_root); - - // verify the document allocation - assert(reinterpret_cast(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); - } - - PUGI__FN void xml_document::destroy() - { - assert(_root); - - // destroy static storage - if (_buffer) - { - impl::xml_memory::deallocate(_buffer); - _buffer = 0; - } - - // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) - for (impl::xml_extra_buffer* extra = static_cast(_root)->extra_buffers; extra; extra = extra->next) - { - if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); - } - - // destroy dynamic storage, leave sentinel page (it's in static memory) - impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); - assert(root_page && !root_page->prev); - assert(reinterpret_cast(root_page) >= _memory && reinterpret_cast(root_page) < _memory + sizeof(_memory)); - - for (impl::xml_memory_page* page = root_page->next; page; ) - { - impl::xml_memory_page* next = page->next; - - impl::xml_allocator::deallocate_page(page); - - page = next; - } - - #ifdef PUGIXML_COMPACT - // destroy hash table - static_cast(_root)->hash.clear(); - #endif - - _root = 0; - } - -#ifndef PUGIXML_NO_STL - PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options, xml_encoding encoding) - { - reset(); - - return impl::load_stream_impl(static_cast(_root), stream, options, encoding, &_buffer); - } - - PUGI__FN xml_parse_result xml_document::load(std::basic_istream >& stream, unsigned int options) - { - reset(); - - return impl::load_stream_impl(static_cast(_root), stream, options, encoding_wchar, &_buffer); - } -#endif - - PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) - { - // Force native encoding (skip autodetection) - #ifdef PUGIXML_WCHAR_MODE - xml_encoding encoding = encoding_wchar; - #else - xml_encoding encoding = encoding_utf8; - #endif - - return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); - } - - PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) - { - return load_string(contents, options); - } - - PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) - { - reset(); - - using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, "rb"), impl::close_file); - - return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); - } - - PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) - { - reset(); - - using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(impl::open_file_wide(path_, L"rb"), impl::close_file); - - return impl::load_file_impl(static_cast(_root), file.data, options, encoding, &_buffer); - } - - PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - reset(); - - return impl::load_buffer_impl(static_cast(_root), _root, const_cast(contents), size, options, encoding, false, false, &_buffer); - } - - PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - reset(); - - return impl::load_buffer_impl(static_cast(_root), _root, contents, size, options, encoding, true, false, &_buffer); - } - - PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) - { - reset(); - - return impl::load_buffer_impl(static_cast(_root), _root, contents, size, options, encoding, true, true, &_buffer); - } - - PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - impl::xml_buffered_writer buffered_writer(writer, encoding); - - if ((flags & format_write_bom) && encoding != encoding_latin1) - { - // BOM always represents the codepoint U+FEFF, so just write it in native encoding - #ifdef PUGIXML_WCHAR_MODE - unsigned int bom = 0xfeff; - buffered_writer.write(static_cast(bom)); - #else - buffered_writer.write('\xef', '\xbb', '\xbf'); - #endif - } - - if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) - { - buffered_writer.write_string(PUGIXML_TEXT("'); - if (!(flags & format_raw)) buffered_writer.write('\n'); - } - - impl::node_output(buffered_writer, _root, indent, flags, 0); - - buffered_writer.flush(); - } - -#ifndef PUGIXML_NO_STL - PUGI__FN void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding); - } - - PUGI__FN void xml_document::save(std::basic_ostream >& stream, const char_t* indent, unsigned int flags) const - { - xml_writer_stream writer(stream); - - save(writer, indent, flags, encoding_wchar); - } -#endif - - PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - using impl::auto_deleter; // MSVC7 workaround - auto_deleter file(fopen(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); - - return impl::save_file_impl(*this, file.data, indent, flags, encoding); - } - - PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const - { - using impl::auto_deleter; // MSVC7 workaround - wchar_t openMode[3] = {L'\0'}; - if (flags & format_save_file_text) { - openMode[0] = L'w'; - } else { - openMode[0] = L'w'; - openMode[1] = L'b'; - } - auto_deleter file(impl::open_file_wide(path_, openMode), impl::close_file); - - return impl::save_file_impl(*this, file.data, indent, flags, encoding); - } - - PUGI__FN xml_node xml_document::document_element() const - { - assert(_root); - - for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) - if (PUGI__NODETYPE(i) == node_element) - return xml_node(i); - - return xml_node(); - } - -#ifndef PUGIXML_NO_STL - PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) - { - assert(str); - - return impl::as_utf8_impl(str, impl::strlength_wide(str)); - } - - PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string& str) - { - return impl::as_utf8_impl(str.c_str(), str.size()); - } - - PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const char* str) - { - assert(str); - - return impl::as_wide_impl(str, strlen(str)); - } - - PUGI__FN std::basic_string PUGIXML_FUNCTION as_wide(const std::string& str) - { - return impl::as_wide_impl(str.c_str(), str.size()); - } -#endif - - PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) - { - impl::xml_memory::allocate = allocate; - impl::xml_memory::deallocate = deallocate; - } - - PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() - { - return impl::xml_memory::allocate; - } - - PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() - { - return impl::xml_memory::deallocate; - } -} - -#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) -namespace std -{ - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) -namespace std -{ - // Workarounds for (non-standard) iterator category detection - PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) - { - return std::bidirectional_iterator_tag(); - } - - PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) - { - return std::bidirectional_iterator_tag(); - } -} -#endif - -#ifndef PUGIXML_NO_XPATH -// STL replacements -PUGI__NS_BEGIN - struct equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs == rhs; - } - }; - - struct not_equal_to - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs != rhs; - } - }; - - struct less - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs < rhs; - } - }; - - struct less_equal - { - template bool operator()(const T& lhs, const T& rhs) const - { - return lhs <= rhs; - } - }; - - template void swap(T& lhs, T& rhs) - { - T temp = lhs; - lhs = rhs; - rhs = temp; - } - - template I min_element(I begin, I end, const Pred& pred) - { - I result = begin; - - for (I it = begin + 1; it != end; ++it) - if (pred(*it, *result)) - result = it; - - return result; - } - - template void reverse(I begin, I end) - { - while (end - begin > 1) swap(*begin++, *--end); - } - - template I unique(I begin, I end) - { - // fast skip head - while (end - begin > 1 && *begin != *(begin + 1)) begin++; - - if (begin == end) return begin; - - // last written element - I write = begin++; - - // merge unique elements - while (begin != end) - { - if (*begin != *write) - *++write = *begin++; - else - begin++; - } - - // past-the-end (write points to live element) - return write + 1; - } - - template void copy_backwards(I begin, I end, I target) - { - while (begin != end) *--target = *--end; - } - - template void insertion_sort(I begin, I end, const Pred& pred, T*) - { - assert(begin != end); - - for (I it = begin + 1; it != end; ++it) - { - T val = *it; - - if (pred(val, *begin)) - { - // move to front - copy_backwards(begin, it, it + 1); - *begin = val; - } - else - { - I hole = it; - - // move hole backwards - while (pred(val, *(hole - 1))) - { - *hole = *(hole - 1); - hole--; - } - - // fill hole with element - *hole = val; - } - } - } - - // std variant for elements with == - template void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg, I* out_eqend) - { - I eqbeg = middle, eqend = middle + 1; - - // expand equal range - while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg; - while (eqend != end && *eqend == *eqbeg) ++eqend; - - // process outer elements - I ltend = eqbeg, gtbeg = eqend; - - for (;;) - { - // find the element from the right side that belongs to the left one - for (; gtbeg != end; ++gtbeg) - if (!pred(*eqbeg, *gtbeg)) - { - if (*gtbeg == *eqbeg) swap(*gtbeg, *eqend++); - else break; - } - - // find the element from the left side that belongs to the right one - for (; ltend != begin; --ltend) - if (!pred(*(ltend - 1), *eqbeg)) - { - if (*eqbeg == *(ltend - 1)) swap(*(ltend - 1), *--eqbeg); - else break; - } - - // scanned all elements - if (gtbeg == end && ltend == begin) - { - *out_eqbeg = eqbeg; - *out_eqend = eqend; - return; - } - - // make room for elements by moving equal area - if (gtbeg == end) - { - if (--ltend != --eqbeg) swap(*ltend, *eqbeg); - swap(*eqbeg, *--eqend); - } - else if (ltend == begin) - { - if (eqend != gtbeg) swap(*eqbeg, *eqend); - ++eqend; - swap(*gtbeg++, *eqbeg++); - } - else swap(*gtbeg++, *--ltend); - } - } - - template void median3(I first, I middle, I last, const Pred& pred) - { - if (pred(*middle, *first)) swap(*middle, *first); - if (pred(*last, *middle)) swap(*last, *middle); - if (pred(*middle, *first)) swap(*middle, *first); - } - - template void median(I first, I middle, I last, const Pred& pred) - { - if (last - first <= 40) - { - // median of three for small chunks - median3(first, middle, last, pred); - } - else - { - // median of nine - size_t step = (last - first + 1) / 8; - - median3(first, first + step, first + 2 * step, pred); - median3(middle - step, middle, middle + step, pred); - median3(last - 2 * step, last - step, last, pred); - median3(first + step, middle, last - step, pred); - } - } - - template void sort(I begin, I end, const Pred& pred) - { - // sort large chunks - while (end - begin > 32) - { - // find median element - I middle = begin + (end - begin) / 2; - median(begin, middle, end - 1, pred); - - // partition in three chunks (< = >) - I eqbeg, eqend; - partition(begin, middle, end, pred, &eqbeg, &eqend); - - // loop on larger half - if (eqbeg - begin > end - eqend) - { - sort(eqend, end, pred); - end = eqbeg; - } - else - { - sort(begin, eqbeg, pred); - begin = eqend; - } - } - - // insertion sort small chunk - if (begin != end) insertion_sort(begin, end, pred, &*begin); - } -PUGI__NS_END - -// Allocator used for AST and evaluation stacks -PUGI__NS_BEGIN - static const size_t xpath_memory_page_size = - #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE - PUGIXML_MEMORY_XPATH_PAGE_SIZE - #else - 4096 - #endif - ; - - static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); - - struct xpath_memory_block - { - xpath_memory_block* next; - size_t capacity; - - union - { - char data[xpath_memory_page_size]; - double alignment; - }; - }; - - class xpath_allocator - { - xpath_memory_block* _root; - size_t _root_size; - - public: - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf* error_handler; - #endif - - xpath_allocator(xpath_memory_block* root, size_t root_size = 0): _root(root), _root_size(root_size) - { - #ifdef PUGIXML_NO_EXCEPTIONS - error_handler = 0; - #endif - } - - void* allocate_nothrow(size_t size) - { - // round size up to block alignment boundary - size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); - - if (_root_size + size <= _root->capacity) - { - void* buf = &_root->data[0] + _root_size; - _root_size += size; - return buf; - } - else - { - // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests - size_t block_capacity_base = sizeof(_root->data); - size_t block_capacity_req = size + block_capacity_base / 4; - size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; - - size_t block_size = block_capacity + offsetof(xpath_memory_block, data); - - xpath_memory_block* block = static_cast(xml_memory::allocate(block_size)); - if (!block) return 0; - - block->next = _root; - block->capacity = block_capacity; - - _root = block; - _root_size = size; - - return block->data; - } - } - - void* allocate(size_t size) - { - void* result = allocate_nothrow(size); - - if (!result) - { - #ifdef PUGIXML_NO_EXCEPTIONS - assert(error_handler); - longjmp(*error_handler, 1); - #else - throw std::bad_alloc(); - #endif - } - - return result; - } - - void* reallocate(void* ptr, size_t old_size, size_t new_size) - { - // round size up to block alignment boundary - old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); - new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); - - // we can only reallocate the last object - assert(ptr == 0 || static_cast(ptr) + old_size == &_root->data[0] + _root_size); - - // adjust root size so that we have not allocated the object at all - bool only_object = (_root_size == old_size); - - if (ptr) _root_size -= old_size; - - // allocate a new version (this will obviously reuse the memory if possible) - void* result = allocate(new_size); - assert(result); - - // we have a new block - if (result != ptr && ptr) - { - // copy old data - assert(new_size >= old_size); - memcpy(result, ptr, old_size); - - // free the previous page if it had no other objects - if (only_object) - { - assert(_root->data == result); - assert(_root->next); - - xpath_memory_block* next = _root->next->next; - - if (next) - { - // deallocate the whole page, unless it was the first one - xml_memory::deallocate(_root->next); - _root->next = next; - } - } - } - - return result; - } - - void revert(const xpath_allocator& state) - { - // free all new pages - xpath_memory_block* cur = _root; - - while (cur != state._root) - { - xpath_memory_block* next = cur->next; - - xml_memory::deallocate(cur); - - cur = next; - } - - // restore state - _root = state._root; - _root_size = state._root_size; - } - - void release() - { - xpath_memory_block* cur = _root; - assert(cur); - - while (cur->next) - { - xpath_memory_block* next = cur->next; - - xml_memory::deallocate(cur); - - cur = next; - } - } - }; - - struct xpath_allocator_capture - { - xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) - { - } - - ~xpath_allocator_capture() - { - _target->revert(_state); - } - - xpath_allocator* _target; - xpath_allocator _state; - }; - - struct xpath_stack - { - xpath_allocator* result; - xpath_allocator* temp; - }; - - struct xpath_stack_data - { - xpath_memory_block blocks[2]; - xpath_allocator result; - xpath_allocator temp; - xpath_stack stack; - - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf error_handler; - #endif - - xpath_stack_data(): result(blocks + 0), temp(blocks + 1) - { - blocks[0].next = blocks[1].next = 0; - blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); - - stack.result = &result; - stack.temp = &temp; - - #ifdef PUGIXML_NO_EXCEPTIONS - result.error_handler = temp.error_handler = &error_handler; - #endif - } - - ~xpath_stack_data() - { - result.release(); - temp.release(); - } - }; -PUGI__NS_END - -// String class -PUGI__NS_BEGIN - class xpath_string - { - const char_t* _buffer; - bool _uses_heap; - size_t _length_heap; - - static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) - { - char_t* result = static_cast(alloc->allocate((length + 1) * sizeof(char_t))); - assert(result); - - memcpy(result, string, length * sizeof(char_t)); - result[length] = 0; - - return result; - } - - xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) - { - } - - public: - static xpath_string from_const(const char_t* str) - { - return xpath_string(str, false, 0); - } - - static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) - { - assert(begin <= end && *end == 0); - - return xpath_string(begin, true, static_cast(end - begin)); - } - - static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) - { - assert(begin <= end); - - size_t length = static_cast(end - begin); - - return length == 0 ? xpath_string() : xpath_string(duplicate_string(begin, length, alloc), true, length); - } - - xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) - { - } - - void append(const xpath_string& o, xpath_allocator* alloc) - { - // skip empty sources - if (!*o._buffer) return; - - // fast append for constant empty target and constant source - if (!*_buffer && !_uses_heap && !o._uses_heap) - { - _buffer = o._buffer; - } - else - { - // need to make heap copy - size_t target_length = length(); - size_t source_length = o.length(); - size_t result_length = target_length + source_length; - - // allocate new buffer - char_t* result = static_cast(alloc->reallocate(_uses_heap ? const_cast(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); - assert(result); - - // append first string to the new buffer in case there was no reallocation - if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); - - // append second string to the new buffer - memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); - result[result_length] = 0; - - // finalize - _buffer = result; - _uses_heap = true; - _length_heap = result_length; - } - } - - const char_t* c_str() const - { - return _buffer; - } - - size_t length() const - { - return _uses_heap ? _length_heap : strlength(_buffer); - } - - char_t* data(xpath_allocator* alloc) - { - // make private heap copy - if (!_uses_heap) - { - size_t length_ = strlength(_buffer); - - _buffer = duplicate_string(_buffer, length_, alloc); - _uses_heap = true; - _length_heap = length_; - } - - return const_cast(_buffer); - } - - bool empty() const - { - return *_buffer == 0; - } - - bool operator==(const xpath_string& o) const - { - return strequal(_buffer, o._buffer); - } - - bool operator!=(const xpath_string& o) const - { - return !strequal(_buffer, o._buffer); - } - - bool uses_heap() const - { - return _uses_heap; - } - }; -PUGI__NS_END - -PUGI__NS_BEGIN - PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) - { - while (*pattern && *string == *pattern) - { - string++; - pattern++; - } - - return *pattern == 0; - } - - PUGI__FN const char_t* find_char(const char_t* s, char_t c) - { - #ifdef PUGIXML_WCHAR_MODE - return wcschr(s, c); - #else - return strchr(s, c); - #endif - } - - PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) - { - #ifdef PUGIXML_WCHAR_MODE - // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) - return (*p == 0) ? s : wcsstr(s, p); - #else - return strstr(s, p); - #endif - } - - // Converts symbol to lower case, if it is an ASCII one - PUGI__FN char_t tolower_ascii(char_t ch) - { - return static_cast(ch - 'A') < 26 ? static_cast(ch | ' ') : ch; - } - - PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) - { - if (na.attribute()) - return xpath_string::from_const(na.attribute().value()); - else - { - xml_node n = na.node(); - - switch (n.type()) - { - case node_pcdata: - case node_cdata: - case node_comment: - case node_pi: - return xpath_string::from_const(n.value()); - - case node_document: - case node_element: - { - xpath_string result; - - // element nodes can have value if parse_embed_pcdata was used - if (n.value()[0]) - result.append(xpath_string::from_const(n.value()), alloc); - - xml_node cur = n.first_child(); - - while (cur && cur != n) - { - if (cur.type() == node_pcdata || cur.type() == node_cdata) - result.append(xpath_string::from_const(cur.value()), alloc); - - if (cur.first_child()) - cur = cur.first_child(); - else if (cur.next_sibling()) - cur = cur.next_sibling(); - else - { - while (!cur.next_sibling() && cur != n) - cur = cur.parent(); - - if (cur != n) cur = cur.next_sibling(); - } - } - - return result; - } - - default: - return xpath_string(); - } - } - } - - PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) - { - assert(ln->parent == rn->parent); - - // there is no common ancestor (the shared parent is null), nodes are from different documents - if (!ln->parent) return ln < rn; - - // determine sibling order - xml_node_struct* ls = ln; - xml_node_struct* rs = rn; - - while (ls && rs) - { - if (ls == rn) return true; - if (rs == ln) return false; - - ls = ls->next_sibling; - rs = rs->next_sibling; - } - - // if rn sibling chain ended ln must be before rn - return !rs; - } - - PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) - { - // find common ancestor at the same depth, if any - xml_node_struct* lp = ln; - xml_node_struct* rp = rn; - - while (lp && rp && lp->parent != rp->parent) - { - lp = lp->parent; - rp = rp->parent; - } - - // parents are the same! - if (lp && rp) return node_is_before_sibling(lp, rp); - - // nodes are at different depths, need to normalize heights - bool left_higher = !lp; - - while (lp) - { - lp = lp->parent; - ln = ln->parent; - } - - while (rp) - { - rp = rp->parent; - rn = rn->parent; - } - - // one node is the ancestor of the other - if (ln == rn) return left_higher; - - // find common ancestor... again - while (ln->parent != rn->parent) - { - ln = ln->parent; - rn = rn->parent; - } - - return node_is_before_sibling(ln, rn); - } - - PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) - { - while (node && node != parent) node = node->parent; - - return parent && node == parent; - } - - PUGI__FN const void* document_buffer_order(const xpath_node& xnode) - { - xml_node_struct* node = xnode.node().internal_object(); - - if (node) - { - if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) - { - if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; - if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; - } - - return 0; - } - - xml_attribute_struct* attr = xnode.attribute().internal_object(); - - if (attr) - { - if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) - { - if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; - if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; - } - - return 0; - } - - return 0; - } - - struct document_order_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - // optimized document order based check - const void* lo = document_buffer_order(lhs); - const void* ro = document_buffer_order(rhs); - - if (lo && ro) return lo < ro; - - // slow comparison - xml_node ln = lhs.node(), rn = rhs.node(); - - // compare attributes - if (lhs.attribute() && rhs.attribute()) - { - // shared parent - if (lhs.parent() == rhs.parent()) - { - // determine sibling order - for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) - if (a == rhs.attribute()) - return true; - - return false; - } - - // compare attribute parents - ln = lhs.parent(); - rn = rhs.parent(); - } - else if (lhs.attribute()) - { - // attributes go after the parent element - if (lhs.parent() == rhs.node()) return false; - - ln = lhs.parent(); - } - else if (rhs.attribute()) - { - // attributes go after the parent element - if (rhs.parent() == lhs.node()) return true; - - rn = rhs.parent(); - } - - if (ln == rn) return false; - - if (!ln || !rn) return ln < rn; - - return node_is_before(ln.internal_object(), rn.internal_object()); - } - }; - - struct duplicate_comparator - { - bool operator()(const xpath_node& lhs, const xpath_node& rhs) const - { - if (lhs.attribute()) return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true; - else return rhs.attribute() ? false : lhs.node() < rhs.node(); - } - }; - - PUGI__FN double gen_nan() - { - #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) - PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); - typedef uint32_t UI; // BCC5 workaround - union { float f; UI i; } u; - u.i = 0x7fc00000; - return u.f; - #else - // fallback - const volatile double zero = 0.0; - return zero / zero; - #endif - } - - PUGI__FN bool is_nan(double value) - { - #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) - return !!_isnan(value); - #elif defined(fpclassify) && defined(FP_NAN) - return fpclassify(value) == FP_NAN; - #else - // fallback - const volatile double v = value; - return v != v; - #endif - } - - PUGI__FN const char_t* convert_number_to_string_special(double value) - { - #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) - if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; - if (_isnan(value)) return PUGIXML_TEXT("NaN"); - return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); - #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) - switch (fpclassify(value)) - { - case FP_NAN: - return PUGIXML_TEXT("NaN"); - - case FP_INFINITE: - return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); - - case FP_ZERO: - return PUGIXML_TEXT("0"); - - default: - return 0; - } - #else - // fallback - const volatile double v = value; - - if (v == 0) return PUGIXML_TEXT("0"); - if (v != v) return PUGIXML_TEXT("NaN"); - if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); - return 0; - #endif - } - - PUGI__FN bool convert_number_to_boolean(double value) - { - return (value != 0 && !is_nan(value)); - } - - PUGI__FN void truncate_zeros(char* begin, char* end) - { - while (begin != end && end[-1] == '0') end--; - - *end = 0; - } - - // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent -#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE) - PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) - { - // get base values - int sign, exponent; - _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign); - - // truncate redundant zeros - truncate_zeros(buffer, buffer + strlen(buffer)); - - // fill results - *out_mantissa = buffer; - *out_exponent = exponent; - } -#else - PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer, size_t buffer_size, char** out_mantissa, int* out_exponent) - { - // get a scientific notation value with IEEE DBL_DIG decimals - sprintf(buffer, "%.*e", DBL_DIG, value); - assert(strlen(buffer) < buffer_size); - (void)!buffer_size; - - // get the exponent (possibly negative) - char* exponent_string = strchr(buffer, 'e'); - assert(exponent_string); - - int exponent = atoi(exponent_string + 1); - - // extract mantissa string: skip sign - char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; - assert(mantissa[0] != '0' && mantissa[1] == '.'); - - // divide mantissa by 10 to eliminate integer part - mantissa[1] = mantissa[0]; - mantissa++; - exponent++; - - // remove extra mantissa digits and zero-terminate mantissa - truncate_zeros(mantissa, exponent_string); - - // fill results - *out_mantissa = mantissa; - *out_exponent = exponent; - } -#endif - - PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) - { - // try special number conversion - const char_t* special = convert_number_to_string_special(value); - if (special) return xpath_string::from_const(special); - - // get mantissa + exponent form - char mantissa_buffer[32] = {}; - char* mantissa = nullptr; - int exponent = 0; - - convert_number_to_mantissa_exponent(value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent); - - // allocate a buffer of suitable length for the number - size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; - char_t* result = static_cast(alloc->allocate(sizeof(char_t) * result_size)); - assert(result); - - // make the number! - char_t* s = result; - - // sign - if (value < 0) *s++ = '-'; - - // integer part - if (exponent <= 0) - { - *s++ = '0'; - } - else - { - while (exponent > 0) - { - assert(*mantissa == 0 || static_cast(static_cast(*mantissa) - '0') <= 9); - *s++ = *mantissa ? *mantissa++ : '0'; - exponent--; - } - } - - // fractional part - if (*mantissa) - { - // decimal point - *s++ = '.'; - - // extra zeroes from negative exponent - while (exponent < 0) - { - *s++ = '0'; - exponent++; - } - - // extra mantissa digits - while (*mantissa) - { - assert(static_cast(*mantissa - '0') <= 9); - *s++ = *mantissa++; - } - } - - // zero-terminate - assert(s < result + result_size); - *s = 0; - - return xpath_string::from_heap_preallocated(result, s); - } - - PUGI__FN bool check_string_to_number_format(const char_t* string) - { - // parse leading whitespace - while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; - - // parse sign - if (*string == '-') ++string; - - if (!*string) return false; - - // if there is no integer part, there should be a decimal part with at least one digit - if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; - - // parse integer part - while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; - - // parse decimal part - if (*string == '.') - { - ++string; - - while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; - } - - // parse trailing whitespace - while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; - - return *string == 0; - } - - PUGI__FN double convert_string_to_number(const char_t* string) - { - // check string format - if (!check_string_to_number_format(string)) return gen_nan(); - - // parse string - #ifdef PUGIXML_WCHAR_MODE - return wcstod(string, 0); - #else - return strtod(string, 0); - #endif - } - - PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) - { - size_t length = static_cast(end - begin); - char_t* scratch = buffer; - - if (length >= sizeof(buffer) / sizeof(buffer[0])) - { - // need to make dummy on-heap copy - scratch = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!scratch) return false; - } - - // copy string to zero-terminated buffer and perform conversion - memcpy(scratch, begin, length * sizeof(char_t)); - scratch[length] = 0; - - *out_result = convert_string_to_number(scratch); - - // free dummy buffer - if (scratch != buffer) xml_memory::deallocate(scratch); - - return true; - } - - PUGI__FN double round_nearest(double value) - { - return floor(value + 0.5); - } - - PUGI__FN double round_nearest_nzero(double value) - { - // same as round_nearest, but returns -0 for [-0.5, -0] - // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) - return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); - } - - PUGI__FN const char_t* qualified_name(const xpath_node& node) - { - return node.attribute() ? node.attribute().name() : node.node().name(); - } - - PUGI__FN const char_t* local_name(const xpath_node& node) - { - const char_t* name = qualified_name(node); - const char_t* p = find_char(name, ':'); - - return p ? p + 1 : name; - } - - struct namespace_uri_predicate - { - const char_t* prefix; - size_t prefix_length; - - namespace_uri_predicate(const char_t* name) - { - const char_t* pos = find_char(name, ':'); - - prefix = pos ? name : 0; - prefix_length = pos ? static_cast(pos - name) : 0; - } - - bool operator()(xml_attribute a) const - { - const char_t* name = a.name(); - - if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; - - return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; - } - }; - - PUGI__FN const char_t* namespace_uri(xml_node node) - { - namespace_uri_predicate pred = node.name(); - - xml_node p = node; - - while (p) - { - xml_attribute a = p.find_attribute(pred); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) - { - namespace_uri_predicate pred = attr.name(); - - // Default namespace does not apply to attributes - if (!pred.prefix) return PUGIXML_TEXT(""); - - xml_node p = parent; - - while (p) - { - xml_attribute a = p.find_attribute(pred); - - if (a) return a.value(); - - p = p.parent(); - } - - return PUGIXML_TEXT(""); - } - - PUGI__FN const char_t* namespace_uri(const xpath_node& node) - { - return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); - } - - PUGI__FN char_t* normalize_space(char_t* buffer) - { - char_t* write = buffer; - - for (char_t* it = buffer; *it; ) - { - char_t ch = *it++; - - if (PUGI__IS_CHARTYPE(ch, ct_space)) - { - // replace whitespace sequence with single space - while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; - - // avoid leading spaces - if (write != buffer) *write++ = ' '; - } - else *write++ = ch; - } - - // remove trailing space - if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; - - // zero-terminate - *write = 0; - - return write; - } - - PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) - { - char_t* write = buffer; - - while (*buffer) - { - PUGI__DMC_VOLATILE char_t ch = *buffer++; - - const char_t* pos = find_char(from, ch); - - if (!pos) - *write++ = ch; // do not process - else if (static_cast(pos - from) < to_length) - *write++ = to[pos - from]; // replace - } - - // zero-terminate - *write = 0; - - return write; - } - - PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) - { - unsigned char table[128] = {0}; - - while (*from) - { - unsigned int fc = static_cast(*from); - unsigned int tc = static_cast(*to); - - if (fc >= 128 || tc >= 128) - return 0; - - // code=128 means "skip character" - if (!table[fc]) - table[fc] = static_cast(tc ? tc : 128); - - from++; - if (tc) to++; - } - - for (int i = 0; i < 128; ++i) - if (!table[i]) - table[i] = static_cast(i); - - void* result = alloc->allocate_nothrow(sizeof(table)); - - if (result) - { - memcpy(result, table, sizeof(table)); - } - - return static_cast(result); - } - - PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) - { - char_t* write = buffer; - - while (*buffer) - { - char_t ch = *buffer++; - unsigned int index = static_cast(ch); - - if (index < 128) - { - unsigned char code = table[index]; - - // code=128 means "skip character" (table size is 128 so 128 can be a special value) - // this code skips these characters without extra branches - *write = static_cast(code); - write += 1 - (code >> 7); - } - else - { - *write++ = ch; - } - } - - // zero-terminate - *write = 0; - - return write; - } - - inline bool is_xpath_attribute(const char_t* name) - { - return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); - } - - struct xpath_variable_boolean: xpath_variable - { - xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) - { - } - - bool value; - char_t name[1]; - }; - - struct xpath_variable_number: xpath_variable - { - xpath_variable_number(): xpath_variable(xpath_type_number), value(0) - { - } - - double value; - char_t name[1]; - }; - - struct xpath_variable_string: xpath_variable - { - xpath_variable_string(): xpath_variable(xpath_type_string), value(0) - { - } - - ~xpath_variable_string() - { - if (value) xml_memory::deallocate(value); - } - - char_t* value; - char_t name[1]; - }; - - struct xpath_variable_node_set: xpath_variable - { - xpath_variable_node_set(): xpath_variable(xpath_type_node_set) - { - } - - xpath_node_set value; - char_t name[1]; - }; - - static const xpath_node_set dummy_node_set; - - PUGI__FN unsigned int hash_string(const char_t* str) - { - // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) - unsigned int result = 0; - - while (*str) - { - result += static_cast(*str++); - result += result << 10; - result ^= result >> 6; - } - - result += result << 3; - result ^= result >> 11; - result += result << 15; - - return result; - } - - template PUGI__FN T* new_xpath_variable(const char_t* name) - { - size_t length = strlength(name); - if (length == 0) return 0; // empty variable names are invalid - - // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters - void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); - if (!memory) return 0; - - T* result = new (memory) T(); - - memcpy(result->name, name, (length + 1) * sizeof(char_t)); - - return result; - } - - PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) - { - switch (type) - { - case xpath_type_node_set: - return new_xpath_variable(name); - - case xpath_type_number: - return new_xpath_variable(name); - - case xpath_type_string: - return new_xpath_variable(name); - - case xpath_type_boolean: - return new_xpath_variable(name); - - default: - return 0; - } - } - - template PUGI__FN void delete_xpath_variable(T* var) - { - var->~T(); - xml_memory::deallocate(var); - } - - PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) - { - switch (type) - { - case xpath_type_node_set: - delete_xpath_variable(static_cast(var)); - break; - - case xpath_type_number: - delete_xpath_variable(static_cast(var)); - break; - - case xpath_type_string: - delete_xpath_variable(static_cast(var)); - break; - - case xpath_type_boolean: - delete_xpath_variable(static_cast(var)); - break; - - default: - assert(false && "Invalid variable type"); - } - } - - PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) - { - switch (rhs->type()) - { - case xpath_type_node_set: - return lhs->set(static_cast(rhs)->value); - - case xpath_type_number: - return lhs->set(static_cast(rhs)->value); - - case xpath_type_string: - return lhs->set(static_cast(rhs)->value); - - case xpath_type_boolean: - return lhs->set(static_cast(rhs)->value); - - default: - assert(false && "Invalid variable type"); - return false; - } - } - - PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) - { - size_t length = static_cast(end - begin); - char_t* scratch = buffer; - - if (length >= sizeof(buffer) / sizeof(buffer[0])) - { - // need to make dummy on-heap copy - scratch = static_cast(xml_memory::allocate((length + 1) * sizeof(char_t))); - if (!scratch) return false; - } - - // copy string to zero-terminated buffer and perform lookup - memcpy(scratch, begin, length * sizeof(char_t)); - scratch[length] = 0; - - *out_result = set->get(scratch); - - // free dummy buffer - if (scratch != buffer) xml_memory::deallocate(scratch); - - return true; - } -PUGI__NS_END - -// Internal node set class -PUGI__NS_BEGIN - PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) - { - if (end - begin < 2) - return xpath_node_set::type_sorted; - - document_order_comparator cmp; - - bool first = cmp(begin[0], begin[1]); - - for (const xpath_node* it = begin + 1; it + 1 < end; ++it) - if (cmp(it[0], it[1]) != first) - return xpath_node_set::type_unsorted; - - return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; - } - - PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) - { - xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; - - if (type == xpath_node_set::type_unsorted) - { - xpath_node_set::type_t sorted = xpath_get_order(begin, end); - - if (sorted == xpath_node_set::type_unsorted) - { - sort(begin, end, document_order_comparator()); - - type = xpath_node_set::type_sorted; - } - else - type = sorted; - } - - if (type != order) reverse(begin, end); - - return order; - } - - PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) - { - if (begin == end) return xpath_node(); - - switch (type) - { - case xpath_node_set::type_sorted: - return *begin; - - case xpath_node_set::type_sorted_reverse: - return *(end - 1); - - case xpath_node_set::type_unsorted: - return *min_element(begin, end, document_order_comparator()); - - default: - assert(false && "Invalid node set type"); - return xpath_node(); - } - } - - class xpath_node_set_raw - { - xpath_node_set::type_t _type; - - xpath_node* _begin; - xpath_node* _end; - xpath_node* _eos; - - public: - xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) - { - } - - xpath_node* begin() const - { - return _begin; - } - - xpath_node* end() const - { - return _end; - } - - bool empty() const - { - return _begin == _end; - } - - size_t size() const - { - return static_cast(_end - _begin); - } - - xpath_node first() const - { - return xpath_first(_begin, _end, _type); - } - - void push_back_grow(const xpath_node& node, xpath_allocator* alloc); - - void push_back(const xpath_node& node, xpath_allocator* alloc) - { - if (_end != _eos) - *_end++ = node; - else - push_back_grow(node, alloc); - } - - void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) - { - if (begin_ == end_) return; - - size_t size_ = static_cast(_end - _begin); - size_t capacity = static_cast(_eos - _begin); - size_t count = static_cast(end_ - begin_); - - if (size_ + count > capacity) - { - // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); - assert(data); - - // finalize - _begin = data; - _end = data + size_; - _eos = data + size_ + count; - } - - memcpy(_end, begin_, count * sizeof(xpath_node)); - _end += count; - } - - void sort_do() - { - _type = xpath_sort(_begin, _end, _type, false); - } - - void truncate(xpath_node* pos) - { - assert(_begin <= pos && pos <= _end); - - _end = pos; - } - - void remove_duplicates() - { - if (_type == xpath_node_set::type_unsorted) - sort(_begin, _end, duplicate_comparator()); - - _end = unique(_begin, _end); - } - - xpath_node_set::type_t type() const - { - return _type; - } - - void set_type(xpath_node_set::type_t value) - { - _type = value; - } - }; - - PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) - { - size_t capacity = static_cast(_eos - _begin); - - // get new capacity (1.5x rule) - size_t new_capacity = capacity + capacity / 2 + 1; - - // reallocate the old array or allocate a new one - xpath_node* data = static_cast(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); - assert(data); - - // finalize - _begin = data; - _end = data + capacity; - _eos = data + new_capacity; - - // push - *_end++ = node; - } -PUGI__NS_END - -PUGI__NS_BEGIN - struct xpath_context - { - xpath_node n; - size_t position, size; - - xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) - { - } - }; - - enum lexeme_t - { - lex_none = 0, - lex_equal, - lex_not_equal, - lex_less, - lex_greater, - lex_less_or_equal, - lex_greater_or_equal, - lex_plus, - lex_minus, - lex_multiply, - lex_union, - lex_var_ref, - lex_open_brace, - lex_close_brace, - lex_quoted_string, - lex_number, - lex_slash, - lex_double_slash, - lex_open_square_brace, - lex_close_square_brace, - lex_string, - lex_comma, - lex_axis_attribute, - lex_dot, - lex_double_dot, - lex_double_colon, - lex_eof - }; - - struct xpath_lexer_string - { - const char_t* begin; - const char_t* end; - - xpath_lexer_string(): begin(0), end(0) - { - } - - bool operator==(const char_t* other) const - { - size_t length = static_cast(end - begin); - - return strequalrange(other, begin, length); - } - }; - - class xpath_lexer - { - const char_t* _cur; - const char_t* _cur_lexeme_pos; - xpath_lexer_string _cur_lexeme_contents; - - lexeme_t _cur_lexeme; - - public: - explicit xpath_lexer(const char_t* query): _cur(query) - { - next(); - } - - const char_t* state() const - { - return _cur; - } - - void next() - { - const char_t* cur = _cur; - - while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; - - // save lexeme position for error reporting - _cur_lexeme_pos = cur; - - switch (*cur) - { - case 0: - _cur_lexeme = lex_eof; - break; - - case '>': - if (*(cur+1) == '=') - { - cur += 2; - _cur_lexeme = lex_greater_or_equal; - } - else - { - cur += 1; - _cur_lexeme = lex_greater; - } - break; - - case '<': - if (*(cur+1) == '=') - { - cur += 2; - _cur_lexeme = lex_less_or_equal; - } - else - { - cur += 1; - _cur_lexeme = lex_less; - } - break; - - case '!': - if (*(cur+1) == '=') - { - cur += 2; - _cur_lexeme = lex_not_equal; - } - else - { - _cur_lexeme = lex_none; - } - break; - - case '=': - cur += 1; - _cur_lexeme = lex_equal; - - break; - - case '+': - cur += 1; - _cur_lexeme = lex_plus; - - break; - - case '-': - cur += 1; - _cur_lexeme = lex_minus; - - break; - - case '*': - cur += 1; - _cur_lexeme = lex_multiply; - - break; - - case '|': - cur += 1; - _cur_lexeme = lex_union; - - break; - - case '$': - cur += 1; - - if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) - { - _cur_lexeme_contents.begin = cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - - if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname - { - cur++; // : - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - } - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_var_ref; - } - else - { - _cur_lexeme = lex_none; - } - - break; - - case '(': - cur += 1; - _cur_lexeme = lex_open_brace; - - break; - - case ')': - cur += 1; - _cur_lexeme = lex_close_brace; - - break; - - case '[': - cur += 1; - _cur_lexeme = lex_open_square_brace; - - break; - - case ']': - cur += 1; - _cur_lexeme = lex_close_square_brace; - - break; - - case ',': - cur += 1; - _cur_lexeme = lex_comma; - - break; - - case '/': - if (*(cur+1) == '/') - { - cur += 2; - _cur_lexeme = lex_double_slash; - } - else - { - cur += 1; - _cur_lexeme = lex_slash; - } - break; - - case '.': - if (*(cur+1) == '.') - { - cur += 2; - _cur_lexeme = lex_double_dot; - } - else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) - { - _cur_lexeme_contents.begin = cur; // . - - ++cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_number; - } - else - { - cur += 1; - _cur_lexeme = lex_dot; - } - break; - - case '@': - cur += 1; - _cur_lexeme = lex_axis_attribute; - - break; - - case '"': - case '\'': - { - char_t terminator = *cur; - - ++cur; - - _cur_lexeme_contents.begin = cur; - while (*cur && *cur != terminator) cur++; - _cur_lexeme_contents.end = cur; - - if (!*cur) - _cur_lexeme = lex_none; - else - { - cur += 1; - _cur_lexeme = lex_quoted_string; - } - - break; - } - - case ':': - if (*(cur+1) == ':') - { - cur += 2; - _cur_lexeme = lex_double_colon; - } - else - { - _cur_lexeme = lex_none; - } - break; - - default: - if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) - { - _cur_lexeme_contents.begin = cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; - - if (*cur == '.') - { - cur++; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; - } - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_number; - } - else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) - { - _cur_lexeme_contents.begin = cur; - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - - if (cur[0] == ':') - { - if (cur[1] == '*') // namespace test ncname:* - { - cur += 2; // :* - } - else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname - { - cur++; // : - - while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; - } - } - - _cur_lexeme_contents.end = cur; - - _cur_lexeme = lex_string; - } - else - { - _cur_lexeme = lex_none; - } - } - - _cur = cur; - } - - lexeme_t current() const - { - return _cur_lexeme; - } - - const char_t* current_pos() const - { - return _cur_lexeme_pos; - } - - const xpath_lexer_string& contents() const - { - assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); - - return _cur_lexeme_contents; - } - }; - - enum ast_type_t - { - ast_unknown, - ast_op_or, // left or right - ast_op_and, // left and right - ast_op_equal, // left = right - ast_op_not_equal, // left != right - ast_op_less, // left < right - ast_op_greater, // left > right - ast_op_less_or_equal, // left <= right - ast_op_greater_or_equal, // left >= right - ast_op_add, // left + right - ast_op_subtract, // left - right - ast_op_multiply, // left * right - ast_op_divide, // left / right - ast_op_mod, // left % right - ast_op_negate, // left - right - ast_op_union, // left | right - ast_predicate, // apply predicate to set; next points to next predicate - ast_filter, // select * from left where right - ast_string_constant, // string constant - ast_number_constant, // number constant - ast_variable, // variable - ast_func_last, // last() - ast_func_position, // position() - ast_func_count, // count(left) - ast_func_id, // id(left) - ast_func_local_name_0, // local-name() - ast_func_local_name_1, // local-name(left) - ast_func_namespace_uri_0, // namespace-uri() - ast_func_namespace_uri_1, // namespace-uri(left) - ast_func_name_0, // name() - ast_func_name_1, // name(left) - ast_func_string_0, // string() - ast_func_string_1, // string(left) - ast_func_concat, // concat(left, right, siblings) - ast_func_starts_with, // starts_with(left, right) - ast_func_contains, // contains(left, right) - ast_func_substring_before, // substring-before(left, right) - ast_func_substring_after, // substring-after(left, right) - ast_func_substring_2, // substring(left, right) - ast_func_substring_3, // substring(left, right, third) - ast_func_string_length_0, // string-length() - ast_func_string_length_1, // string-length(left) - ast_func_normalize_space_0, // normalize-space() - ast_func_normalize_space_1, // normalize-space(left) - ast_func_translate, // translate(left, right, third) - ast_func_boolean, // boolean(left) - ast_func_not, // not(left) - ast_func_true, // true() - ast_func_false, // false() - ast_func_lang, // lang(left) - ast_func_number_0, // number() - ast_func_number_1, // number(left) - ast_func_sum, // sum(left) - ast_func_floor, // floor(left) - ast_func_ceiling, // ceiling(left) - ast_func_round, // round(left) - ast_step, // process set left with step - ast_step_root, // select root node - - ast_opt_translate_table, // translate(left, right, third) where right/third are constants - ast_opt_compare_attribute // @name = 'string' - }; - - enum axis_t - { - axis_ancestor, - axis_ancestor_or_self, - axis_attribute, - axis_child, - axis_descendant, - axis_descendant_or_self, - axis_following, - axis_following_sibling, - axis_namespace, - axis_parent, - axis_preceding, - axis_preceding_sibling, - axis_self - }; - - enum nodetest_t - { - nodetest_none, - nodetest_name, - nodetest_type_node, - nodetest_type_comment, - nodetest_type_pi, - nodetest_type_text, - nodetest_pi, - nodetest_all, - nodetest_all_in_namespace - }; - - enum predicate_t - { - predicate_default, - predicate_posinv, - predicate_constant, - predicate_constant_one - }; - - enum nodeset_eval_t - { - nodeset_eval_all, - nodeset_eval_any, - nodeset_eval_first - }; - - template struct axis_to_type - { - static const axis_t axis; - }; - - template const axis_t axis_to_type::axis = N; - - class xpath_ast_node - { - private: - // node type - char _type; - char _rettype; - - // for ast_step - char _axis; - - // for ast_step/ast_predicate/ast_filter - char _test; - - // tree node structure - xpath_ast_node* _left; - xpath_ast_node* _right; - xpath_ast_node* _next; - - union - { - // value for ast_string_constant - const char_t* string; - // value for ast_number_constant - double number; - // variable for ast_variable - xpath_variable* variable; - // node test for ast_step (node name/namespace/node type/pi target) - const char_t* nodetest; - // table for ast_opt_translate_table - const unsigned char* table; - } _data; - - xpath_ast_node(const xpath_ast_node&); - xpath_ast_node& operator=(const xpath_ast_node&); - - template static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - { - if (lt == xpath_type_boolean || rt == xpath_type_boolean) - return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); - else if (lt == xpath_type_number || rt == xpath_type_number) - return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); - else if (lt == xpath_type_string || rt == xpath_type_string) - { - xpath_allocator_capture cr(stack.result); - - xpath_string ls = lhs->eval_string(c, stack); - xpath_string rs = rhs->eval_string(c, stack); - - return comp(ls, rs); - } - } - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) - return true; - } - - return false; - } - else - { - if (lt == xpath_type_node_set) - { - swap(lhs, rhs); - swap(lt, rt); - } - - if (lt == xpath_type_boolean) - return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); - else if (lt == xpath_type_number) - { - xpath_allocator_capture cr(stack.result); - - double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_string) - { - xpath_allocator_capture cr(stack.result); - - xpath_string l = lhs->eval_string(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(l, string_value(*ri, stack.result))) - return true; - } - - return false; - } - } - - assert(false && "Wrong types"); - return false; - } - - static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) - { - return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; - } - - template static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) - { - xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); - - if (lt != xpath_type_node_set && rt != xpath_type_node_set) - return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); - else if (lt == xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) - { - xpath_allocator_capture cri(stack.result); - - double l = convert_string_to_number(string_value(*li, stack.result).c_str()); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture crii(stack.result); - - if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) - return true; - } - } - - return false; - } - else if (lt != xpath_type_node_set && rt == xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); - - double l = lhs->eval_number(c, stack); - xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) - { - xpath_allocator_capture cri(stack.result); - - if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) - return true; - } - - return false; - } - else if (lt == xpath_type_node_set && rt != xpath_type_node_set) - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); - double r = rhs->eval_number(c, stack); - - for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) - { - xpath_allocator_capture cri(stack.result); - - if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) - return true; - } - - return false; - } - else - { - assert(false && "Wrong types"); - return false; - } - } - - static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) - { - assert(ns.size() >= first); - assert(expr->rettype() != xpath_type_number); - - size_t i = 1; - size_t size = ns.size() - first; - - xpath_node* last = ns.begin() + first; - - // remove_if... or well, sort of - for (xpath_node* it = last; it != ns.end(); ++it, ++i) - { - xpath_context c(*it, i, size); - - if (expr->eval_boolean(c, stack)) - { - *last++ = *it; - - if (once) break; - } - } - - ns.truncate(last); - } - - static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) - { - assert(ns.size() >= first); - assert(expr->rettype() == xpath_type_number); - - size_t i = 1; - size_t size = ns.size() - first; - - xpath_node* last = ns.begin() + first; - - // remove_if... or well, sort of - for (xpath_node* it = last; it != ns.end(); ++it, ++i) - { - xpath_context c(*it, i, size); - - if (expr->eval_number(c, stack) == i) - { - *last++ = *it; - - if (once) break; - } - } - - ns.truncate(last); - } - - static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) - { - assert(ns.size() >= first); - assert(expr->rettype() == xpath_type_number); - - size_t size = ns.size() - first; - - xpath_node* last = ns.begin() + first; - - xpath_context c(xpath_node(), 1, size); - - double er = expr->eval_number(c, stack); - - if (er >= 1.0 && er <= size) - { - size_t eri = static_cast(er); - - if (er == eri) - { - xpath_node r = last[eri - 1]; - - *last++ = r; - } - } - - ns.truncate(last); - } - - void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) - { - if (ns.size() == first) return; - - assert(_type == ast_filter || _type == ast_predicate); - - if (_test == predicate_constant || _test == predicate_constant_one) - apply_predicate_number_const(ns, first, _right, stack); - else if (_right->rettype() == xpath_type_number) - apply_predicate_number(ns, first, _right, stack, once); - else - apply_predicate_boolean(ns, first, _right, stack, once); - } - - void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) - { - if (ns.size() == first) return; - - bool last_once = eval_once(ns.type(), eval); - - for (xpath_ast_node* pred = _right; pred; pred = pred->_next) - pred->apply_predicate(ns, first, stack, !pred->_next && last_once); - } - - bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) - { - assert(a); - - const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); - - switch (_test) - { - case nodetest_name: - if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) - { - ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); - return true; - } - break; - - case nodetest_type_node: - case nodetest_all: - if (is_xpath_attribute(name)) - { - ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); - return true; - } - break; - - case nodetest_all_in_namespace: - if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) - { - ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); - return true; - } - break; - - default: - ; - } - - return false; - } - - bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) - { - assert(n); - - xml_node_type type = PUGI__NODETYPE(n); - - switch (_test) - { - case nodetest_name: - if (type == node_element && n->name && strequal(n->name, _data.nodetest)) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_type_node: - ns.push_back(xml_node(n), alloc); - return true; - - case nodetest_type_comment: - if (type == node_comment) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_type_text: - if (type == node_pcdata || type == node_cdata) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_type_pi: - if (type == node_pi) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_pi: - if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_all: - if (type == node_element) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - case nodetest_all_in_namespace: - if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) - { - ns.push_back(xml_node(n), alloc); - return true; - } - break; - - default: - assert(false && "Unknown axis"); - } - - return false; - } - - template void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_attribute: - { - for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) - if (step_push(ns, a, n, alloc) & once) - return; - - break; - } - - case axis_child: - { - for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) - if (step_push(ns, c, alloc) & once) - return; - - break; - } - - case axis_descendant: - case axis_descendant_or_self: - { - if (axis == axis_descendant_or_self) - if (step_push(ns, n, alloc) & once) - return; - - xml_node_struct* cur = n->first_child; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - if (cur->first_child) - cur = cur->first_child; - else - { - while (!cur->next_sibling) - { - cur = cur->parent; - - if (cur == n) return; - } - - cur = cur->next_sibling; - } - } - - break; - } - - case axis_following_sibling: - { - for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) - if (step_push(ns, c, alloc) & once) - return; - - break; - } - - case axis_preceding_sibling: - { - for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) - if (step_push(ns, c, alloc) & once) - return; - - break; - } - - case axis_following: - { - xml_node_struct* cur = n; - - // exit from this node so that we don't include descendants - while (!cur->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->next_sibling; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - if (cur->first_child) - cur = cur->first_child; - else - { - while (!cur->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->next_sibling; - } - } - - break; - } - - case axis_preceding: - { - xml_node_struct* cur = n; - - // exit from this node so that we don't include descendants - while (!cur->prev_sibling_c->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->prev_sibling_c; - - while (cur) - { - if (cur->first_child) - cur = cur->first_child->prev_sibling_c; - else - { - // leaf node, can't be ancestor - if (step_push(ns, cur, alloc) & once) - return; - - while (!cur->prev_sibling_c->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - - if (!node_is_ancestor(cur, n)) - if (step_push(ns, cur, alloc) & once) - return; - } - - cur = cur->prev_sibling_c; - } - } - - break; - } - - case axis_ancestor: - case axis_ancestor_or_self: - { - if (axis == axis_ancestor_or_self) - if (step_push(ns, n, alloc) & once) - return; - - xml_node_struct* cur = n->parent; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - cur = cur->parent; - } - - break; - } - - case axis_self: - { - step_push(ns, n, alloc); - - break; - } - - case axis_parent: - { - if (n->parent) - step_push(ns, n->parent, alloc); - - break; - } - - default: - assert(false && "Unimplemented axis"); - } - } - - template void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) - { - const axis_t axis = T::axis; - - switch (axis) - { - case axis_ancestor: - case axis_ancestor_or_self: - { - if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test - if (step_push(ns, a, p, alloc) & once) - return; - - xml_node_struct* cur = p; - - while (cur) - { - if (step_push(ns, cur, alloc) & once) - return; - - cur = cur->parent; - } - - break; - } - - case axis_descendant_or_self: - case axis_self: - { - if (_test == nodetest_type_node) // reject attributes based on principal node type test - step_push(ns, a, p, alloc); - - break; - } - - case axis_following: - { - xml_node_struct* cur = p; - - while (cur) - { - if (cur->first_child) - cur = cur->first_child; - else - { - while (!cur->next_sibling) - { - cur = cur->parent; - - if (!cur) return; - } - - cur = cur->next_sibling; - } - - if (step_push(ns, cur, alloc) & once) - return; - } - - break; - } - - case axis_parent: - { - step_push(ns, p, alloc); - - break; - } - - case axis_preceding: - { - // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding - step_fill(ns, p, alloc, once, v); - break; - } - - default: - assert(false && "Unimplemented axis"); - } - } - - template void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) - { - const axis_t axis = T::axis; - const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); - - if (xn.node()) - step_fill(ns, xn.node().internal_object(), alloc, once, v); - else if (axis_has_attributes && xn.attribute() && xn.parent()) - step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); - } - - template xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) - { - const axis_t axis = T::axis; - const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); - const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; - - bool once = - (axis == axis_attribute && _test == nodetest_name) || - (!_right && eval_once(axis_type, eval)) || - (_right && !_right->_next && _right->_test == predicate_constant_one); - - xpath_node_set_raw ns; - ns.set_type(axis_type); - - if (_left) - { - xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); - - // self axis preserves the original order - if (axis == axis_self) ns.set_type(s.type()); - - for (const xpath_node* it = s.begin(); it != s.end(); ++it) - { - size_t size = ns.size(); - - // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes - if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); - - step_fill(ns, *it, stack.result, once, v); - if (_right) apply_predicates(ns, size, stack, eval); - } - } - else - { - step_fill(ns, c.n, stack.result, once, v); - if (_right) apply_predicates(ns, 0, stack, eval); - } - - // child, attribute and self axes always generate unique set of nodes - // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice - if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) - ns.remove_duplicates(); - - return ns; - } - - public: - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) - { - assert(type == ast_string_constant); - _data.string = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) - { - assert(type == ast_number_constant); - _data.number = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) - { - assert(type == ast_variable); - _data.variable = value; - } - - xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): - _type(static_cast(type)), _rettype(static_cast(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) - { - } - - xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): - _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(static_cast(axis)), _test(static_cast(test)), _left(left), _right(0), _next(0) - { - assert(type == ast_step); - _data.nodetest = contents; - } - - xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): - _type(static_cast(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast(test)), _left(left), _right(right), _next(0) - { - assert(type == ast_filter || type == ast_predicate); - } - - void set_next(xpath_ast_node* value) - { - _next = value; - } - - void set_right(xpath_ast_node* value) - { - _right = value; - } - - bool eval_boolean(const xpath_context& c, const xpath_stack& stack) - { - switch (_type) - { - case ast_op_or: - return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); - - case ast_op_and: - return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); - - case ast_op_equal: - return compare_eq(_left, _right, c, stack, equal_to()); - - case ast_op_not_equal: - return compare_eq(_left, _right, c, stack, not_equal_to()); - - case ast_op_less: - return compare_rel(_left, _right, c, stack, less()); - - case ast_op_greater: - return compare_rel(_right, _left, c, stack, less()); - - case ast_op_less_or_equal: - return compare_rel(_left, _right, c, stack, less_equal()); - - case ast_op_greater_or_equal: - return compare_rel(_right, _left, c, stack, less_equal()); - - case ast_func_starts_with: - { - xpath_allocator_capture cr(stack.result); - - xpath_string lr = _left->eval_string(c, stack); - xpath_string rr = _right->eval_string(c, stack); - - return starts_with(lr.c_str(), rr.c_str()); - } - - case ast_func_contains: - { - xpath_allocator_capture cr(stack.result); - - xpath_string lr = _left->eval_string(c, stack); - xpath_string rr = _right->eval_string(c, stack); - - return find_substring(lr.c_str(), rr.c_str()) != 0; - } - - case ast_func_boolean: - return _left->eval_boolean(c, stack); - - case ast_func_not: - return !_left->eval_boolean(c, stack); - - case ast_func_true: - return true; - - case ast_func_false: - return false; - - case ast_func_lang: - { - if (c.n.attribute()) return false; - - xpath_allocator_capture cr(stack.result); - - xpath_string lang = _left->eval_string(c, stack); - - for (xml_node n = c.n.node(); n; n = n.parent()) - { - xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); - - if (a) - { - const char_t* value = a.value(); - - // strnicmp / strncasecmp is not portable - for (const char_t* lit = lang.c_str(); *lit; ++lit) - { - if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; - ++value; - } - - return *value == 0 || *value == '-'; - } - } - - return false; - } - - case ast_opt_compare_attribute: - { - const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); - - xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); - - return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); - } - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_boolean) - return _data.variable->get_boolean(); - - // fallthrough to type conversion - } - - default: - { - switch (_rettype) - { - case xpath_type_number: - return convert_number_to_boolean(eval_number(c, stack)); - - case xpath_type_string: - { - xpath_allocator_capture cr(stack.result); - - return !eval_string(c, stack).empty(); - } - - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.result); - - return !eval_node_set(c, stack, nodeset_eval_any).empty(); - } - - default: - assert(false && "Wrong expression for return type boolean"); - return false; - } - } - } - } - - double eval_number(const xpath_context& c, const xpath_stack& stack) - { - switch (_type) - { - case ast_op_add: - return _left->eval_number(c, stack) + _right->eval_number(c, stack); - - case ast_op_subtract: - return _left->eval_number(c, stack) - _right->eval_number(c, stack); - - case ast_op_multiply: - return _left->eval_number(c, stack) * _right->eval_number(c, stack); - - case ast_op_divide: - return _left->eval_number(c, stack) / _right->eval_number(c, stack); - - case ast_op_mod: - return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); - - case ast_op_negate: - return -_left->eval_number(c, stack); - - case ast_number_constant: - return _data.number; - - case ast_func_last: - return static_cast(c.size); - - case ast_func_position: - return static_cast(c.position); - - case ast_func_count: - { - xpath_allocator_capture cr(stack.result); - - return static_cast(_left->eval_node_set(c, stack, nodeset_eval_all).size()); - } - - case ast_func_string_length_0: - { - xpath_allocator_capture cr(stack.result); - - return static_cast(string_value(c.n, stack.result).length()); - } - - case ast_func_string_length_1: - { - xpath_allocator_capture cr(stack.result); - - return static_cast(_left->eval_string(c, stack).length()); - } - - case ast_func_number_0: - { - xpath_allocator_capture cr(stack.result); - - return convert_string_to_number(string_value(c.n, stack.result).c_str()); - } - - case ast_func_number_1: - return _left->eval_number(c, stack); - - case ast_func_sum: - { - xpath_allocator_capture cr(stack.result); - - double r = 0; - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); - - for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) - { - xpath_allocator_capture cri(stack.result); - - r += convert_string_to_number(string_value(*it, stack.result).c_str()); - } - - return r; - } - - case ast_func_floor: - { - double r = _left->eval_number(c, stack); - - return r == r ? floor(r) : r; - } - - case ast_func_ceiling: - { - double r = _left->eval_number(c, stack); - - return r == r ? ceil(r) : r; - } - - case ast_func_round: - return round_nearest_nzero(_left->eval_number(c, stack)); - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_number) - return _data.variable->get_number(); - - // fallthrough to type conversion - } - - default: - { - switch (_rettype) - { - case xpath_type_boolean: - return eval_boolean(c, stack) ? 1 : 0; - - case xpath_type_string: - { - xpath_allocator_capture cr(stack.result); - - return convert_string_to_number(eval_string(c, stack).c_str()); - } - - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.result); - - return convert_string_to_number(eval_string(c, stack).c_str()); - } - - default: - assert(false && "Wrong expression for return type number"); - return 0; - } - - } - } - } - - xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) - { - assert(_type == ast_func_concat); - - xpath_allocator_capture ct(stack.temp); - - // count the string number - size_t count = 1; - for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; - - // gather all strings - xpath_string static_buffer[4]; - xpath_string* buffer = static_buffer; - - // allocate on-heap for large concats - if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) - { - buffer = static_cast(stack.temp->allocate(count * sizeof(xpath_string))); - assert(buffer); - } - - // evaluate all strings to temporary stack - xpath_stack swapped_stack = {stack.temp, stack.result}; - - buffer[0] = _left->eval_string(c, swapped_stack); - - size_t pos = 1; - for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); - assert(pos == count); - - // get total length - size_t length = 0; - for (size_t i = 0; i < count; ++i) length += buffer[i].length(); - - // create final string - char_t* result = static_cast(stack.result->allocate((length + 1) * sizeof(char_t))); - assert(result); - - char_t* ri = result; - - for (size_t j = 0; j < count; ++j) - for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) - *ri++ = *bi; - - *ri = 0; - - return xpath_string::from_heap_preallocated(result, ri); - } - - xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) - { - switch (_type) - { - case ast_string_constant: - return xpath_string::from_const(_data.string); - - case ast_func_local_name_0: - { - xpath_node na = c.n; - - return xpath_string::from_const(local_name(na)); - } - - case ast_func_local_name_1: - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); - xpath_node na = ns.first(); - - return xpath_string::from_const(local_name(na)); - } - - case ast_func_name_0: - { - xpath_node na = c.n; - - return xpath_string::from_const(qualified_name(na)); - } - - case ast_func_name_1: - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); - xpath_node na = ns.first(); - - return xpath_string::from_const(qualified_name(na)); - } - - case ast_func_namespace_uri_0: - { - xpath_node na = c.n; - - return xpath_string::from_const(namespace_uri(na)); - } - - case ast_func_namespace_uri_1: - { - xpath_allocator_capture cr(stack.result); - - xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); - xpath_node na = ns.first(); - - return xpath_string::from_const(namespace_uri(na)); - } - - case ast_func_string_0: - return string_value(c.n, stack.result); - - case ast_func_string_1: - return _left->eval_string(c, stack); - - case ast_func_concat: - return eval_string_concat(c, stack); - - case ast_func_substring_before: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - xpath_string p = _right->eval_string(c, swapped_stack); - - const char_t* pos = find_substring(s.c_str(), p.c_str()); - - return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); - } - - case ast_func_substring_after: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - xpath_string p = _right->eval_string(c, swapped_stack); - - const char_t* pos = find_substring(s.c_str(), p.c_str()); - if (!pos) return xpath_string(); - - const char_t* rbegin = pos + p.length(); - const char_t* rend = s.c_str() + s.length(); - - return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); - } - - case ast_func_substring_2: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - size_t s_length = s.length(); - - double first = round_nearest(_right->eval_number(c, stack)); - - if (is_nan(first)) return xpath_string(); // NaN - else if (first >= s_length + 1) return xpath_string(); - - size_t pos = first < 1 ? 1 : static_cast(first); - assert(1 <= pos && pos <= s_length + 1); - - const char_t* rbegin = s.c_str() + (pos - 1); - const char_t* rend = s.c_str() + s.length(); - - return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); - } - - case ast_func_substring_3: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, swapped_stack); - size_t s_length = s.length(); - - double first = round_nearest(_right->eval_number(c, stack)); - double last = first + round_nearest(_right->_next->eval_number(c, stack)); - - if (is_nan(first) || is_nan(last)) return xpath_string(); - else if (first >= s_length + 1) return xpath_string(); - else if (first >= last) return xpath_string(); - else if (last < 1) return xpath_string(); - - size_t pos = first < 1 ? 1 : static_cast(first); - size_t end = last >= s_length + 1 ? s_length + 1 : static_cast(last); - - assert(1 <= pos && pos <= end && end <= s_length + 1); - const char_t* rbegin = s.c_str() + (pos - 1); - const char_t* rend = s.c_str() + (end - 1); - - return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); - } - - case ast_func_normalize_space_0: - { - xpath_string s = string_value(c.n, stack.result); - - char_t* begin = s.data(stack.result); - char_t* end = normalize_space(begin); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_func_normalize_space_1: - { - xpath_string s = _left->eval_string(c, stack); - - char_t* begin = s.data(stack.result); - char_t* end = normalize_space(begin); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_func_translate: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_string s = _left->eval_string(c, stack); - xpath_string from = _right->eval_string(c, swapped_stack); - xpath_string to = _right->_next->eval_string(c, swapped_stack); - - char_t* begin = s.data(stack.result); - char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_opt_translate_table: - { - xpath_string s = _left->eval_string(c, stack); - - char_t* begin = s.data(stack.result); - char_t* end = translate_table(begin, _data.table); - - return xpath_string::from_heap_preallocated(begin, end); - } - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_string) - return xpath_string::from_const(_data.variable->get_string()); - - // fallthrough to type conversion - } - - default: - { - switch (_rettype) - { - case xpath_type_boolean: - return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); - - case xpath_type_number: - return convert_number_to_string(eval_number(c, stack), stack.result); - - case xpath_type_node_set: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); - return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); - } - - default: - assert(false && "Wrong expression for return type string"); - return xpath_string(); - } - } - } - } - - xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) - { - switch (_type) - { - case ast_op_union: - { - xpath_allocator_capture cr(stack.temp); - - xpath_stack swapped_stack = {stack.temp, stack.result}; - - xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval); - xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval); - - // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother - rs.set_type(xpath_node_set::type_unsorted); - - rs.append(ls.begin(), ls.end(), stack.result); - rs.remove_duplicates(); - - return rs; - } - - case ast_filter: - { - xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); - - // either expression is a number or it contains position() call; sort by document order - if (_test != predicate_posinv) set.sort_do(); - - bool once = eval_once(set.type(), eval); - - apply_predicate(set, 0, stack, once); - - return set; - } - - case ast_func_id: - return xpath_node_set_raw(); - - case ast_step: - { - switch (_axis) - { - case axis_ancestor: - return step_do(c, stack, eval, axis_to_type()); - - case axis_ancestor_or_self: - return step_do(c, stack, eval, axis_to_type()); - - case axis_attribute: - return step_do(c, stack, eval, axis_to_type()); - - case axis_child: - return step_do(c, stack, eval, axis_to_type()); - - case axis_descendant: - return step_do(c, stack, eval, axis_to_type()); - - case axis_descendant_or_self: - return step_do(c, stack, eval, axis_to_type()); - - case axis_following: - return step_do(c, stack, eval, axis_to_type()); - - case axis_following_sibling: - return step_do(c, stack, eval, axis_to_type()); - - case axis_namespace: - // namespaced axis is not supported - return xpath_node_set_raw(); - - case axis_parent: - return step_do(c, stack, eval, axis_to_type()); - - case axis_preceding: - return step_do(c, stack, eval, axis_to_type()); - - case axis_preceding_sibling: - return step_do(c, stack, eval, axis_to_type()); - - case axis_self: - return step_do(c, stack, eval, axis_to_type()); - - default: - assert(false && "Unknown axis"); - return xpath_node_set_raw(); - } - } - - case ast_step_root: - { - assert(!_right); // root step can't have any predicates - - xpath_node_set_raw ns; - - ns.set_type(xpath_node_set::type_sorted); - - if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); - else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); - - return ns; - } - - case ast_variable: - { - assert(_rettype == _data.variable->type()); - - if (_rettype == xpath_type_node_set) - { - const xpath_node_set& s = _data.variable->get_node_set(); - - xpath_node_set_raw ns; - - ns.set_type(s.type()); - ns.append(s.begin(), s.end(), stack.result); - - return ns; - } - - // fallthrough to type conversion - } - - default: - assert(false && "Wrong expression for return type node set"); - return xpath_node_set_raw(); - } - } - - void optimize(xpath_allocator* alloc) - { - if (_left) _left->optimize(alloc); - if (_right) _right->optimize(alloc); - if (_next) _next->optimize(alloc); - - optimize_self(alloc); - } - - void optimize_self(xpath_allocator* alloc) - { - // Rewrite [position()=expr] with [expr] - // Note that this step has to go before classification to recognize [position()=1] - if ((_type == ast_filter || _type == ast_predicate) && - _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) - { - _right = _right->_right; - } - - // Classify filter/predicate ops to perform various optimizations during evaluation - if (_type == ast_filter || _type == ast_predicate) - { - assert(_test == predicate_default); - - if (_right->_type == ast_number_constant && _right->_data.number == 1.0) - _test = predicate_constant_one; - else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) - _test = predicate_constant; - else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) - _test = predicate_posinv; - } - - // Rewrite descendant-or-self::node()/child::foo with descendant::foo - // The former is a full form of //foo, the latter is much faster since it executes the node test immediately - // Do a similar kind of rewrite for self/descendant/descendant-or-self axes - // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) - if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && _left && - _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && - is_posinv_step()) - { - if (_axis == axis_child || _axis == axis_descendant) - _axis = axis_descendant; - else - _axis = axis_descendant_or_self; - - _left = _left->_left; - } - - // Use optimized lookup table implementation for translate() with constant arguments - if (_type == ast_func_translate && _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) - { - unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); - - if (table) - { - _type = ast_opt_translate_table; - _data.table = table; - } - } - - // Use optimized path for @attr = 'value' or @attr = $value - if (_type == ast_op_equal && - _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && - (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) - { - _type = ast_opt_compare_attribute; - } - } - - bool is_posinv_expr() const - { - switch (_type) - { - case ast_func_position: - case ast_func_last: - return false; - - case ast_string_constant: - case ast_number_constant: - case ast_variable: - return true; - - case ast_step: - case ast_step_root: - return true; - - case ast_predicate: - case ast_filter: - return true; - - default: - if (_left && !_left->is_posinv_expr()) return false; - - for (xpath_ast_node* n = _right; n; n = n->_next) - if (!n->is_posinv_expr()) return false; - - return true; - } - } - - bool is_posinv_step() const - { - assert(_type == ast_step); - - for (xpath_ast_node* n = _right; n; n = n->_next) - { - assert(n->_type == ast_predicate); - - if (n->_test != predicate_posinv) - return false; - } - - return true; - } - - xpath_value_type rettype() const - { - return static_cast(_rettype); - } - }; - - struct xpath_parser - { - xpath_allocator* _alloc; - xpath_lexer _lexer; - - const char_t* _query; - xpath_variable_set* _variables; - - xpath_parse_result* _result; - - char_t _scratch[32]; - - #ifdef PUGIXML_NO_EXCEPTIONS - jmp_buf _error_handler; - #endif - - void throw_error(const char* message) - { - _result->error = message; - _result->offset = _lexer.current_pos() - _query; - - #ifdef PUGIXML_NO_EXCEPTIONS - longjmp(_error_handler, 1); - #else - throw xpath_exception(*_result); - #endif - } - - void throw_error_oom() - { - #ifdef PUGIXML_NO_EXCEPTIONS - throw_error("Out of memory"); - #else - throw std::bad_alloc(); - #endif - } - - void* alloc_node() - { - void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node)); - - if (!result) throw_error_oom(); - - return result; - } - - const char_t* alloc_string(const xpath_lexer_string& value) - { - if (value.begin) - { - size_t length = static_cast(value.end - value.begin); - - char_t* c = static_cast(_alloc->allocate_nothrow((length + 1) * sizeof(char_t))); - if (!c) throw_error_oom(); - assert(c); // workaround for clang static analysis - - memcpy(c, value.begin, length * sizeof(char_t)); - c[length] = 0; - - return c; - } - else return 0; - } - - xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1, size_t argc, xpath_ast_node* args[2]) - { - assert(argc <= 1); - - if (argc == 1 && args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - - return new (alloc_node()) xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]); - } - - xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) - { - switch (name.begin[0]) - { - case 'b': - if (name == PUGIXML_TEXT("boolean") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]); - - break; - - case 'c': - if (name == PUGIXML_TEXT("count") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_count, xpath_type_number, args[0]); - } - else if (name == PUGIXML_TEXT("contains") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); - else if (name == PUGIXML_TEXT("concat") && argc >= 2) - return new (alloc_node()) xpath_ast_node(ast_func_concat, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("ceiling") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]); - - break; - - case 'f': - if (name == PUGIXML_TEXT("false") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_false, xpath_type_boolean); - else if (name == PUGIXML_TEXT("floor") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_floor, xpath_type_number, args[0]); - - break; - - case 'i': - if (name == PUGIXML_TEXT("id") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]); - - break; - - case 'l': - if (name == PUGIXML_TEXT("last") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_last, xpath_type_number); - else if (name == PUGIXML_TEXT("lang") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("local-name") && argc <= 1) - return parse_function_helper(ast_func_local_name_0, ast_func_local_name_1, argc, args); - - break; - - case 'n': - if (name == PUGIXML_TEXT("name") && argc <= 1) - return parse_function_helper(ast_func_name_0, ast_func_name_1, argc, args); - else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) - return parse_function_helper(ast_func_namespace_uri_0, ast_func_namespace_uri_1, argc, args); - else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("not") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]); - else if (name == PUGIXML_TEXT("number") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); - - break; - - case 'p': - if (name == PUGIXML_TEXT("position") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_position, xpath_type_number); - - break; - - case 'r': - if (name == PUGIXML_TEXT("round") && argc == 1) - return new (alloc_node()) xpath_ast_node(ast_func_round, xpath_type_number, args[0]); - - break; - - case 's': - if (name == PUGIXML_TEXT("string") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); - else if (name == PUGIXML_TEXT("string-length") && argc <= 1) - return new (alloc_node()) xpath_ast_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); - else if (name == PUGIXML_TEXT("starts-with") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-before") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring-after") && argc == 2) - return new (alloc_node()) xpath_ast_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) - return new (alloc_node()) xpath_ast_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("sum") && argc == 1) - { - if (args[0]->rettype() != xpath_type_node_set) throw_error("Function has to be applied to node set"); - return new (alloc_node()) xpath_ast_node(ast_func_sum, xpath_type_number, args[0]); - } - - break; - - case 't': - if (name == PUGIXML_TEXT("translate") && argc == 3) - return new (alloc_node()) xpath_ast_node(ast_func_translate, xpath_type_string, args[0], args[1]); - else if (name == PUGIXML_TEXT("true") && argc == 0) - return new (alloc_node()) xpath_ast_node(ast_func_true, xpath_type_boolean); - - break; - - default: - break; - } - - throw_error("Unrecognized function or wrong parameter count"); - - return 0; - } - - axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) - { - specified = true; - - switch (name.begin[0]) - { - case 'a': - if (name == PUGIXML_TEXT("ancestor")) - return axis_ancestor; - else if (name == PUGIXML_TEXT("ancestor-or-self")) - return axis_ancestor_or_self; - else if (name == PUGIXML_TEXT("attribute")) - return axis_attribute; - - break; - - case 'c': - if (name == PUGIXML_TEXT("child")) - return axis_child; - - break; - - case 'd': - if (name == PUGIXML_TEXT("descendant")) - return axis_descendant; - else if (name == PUGIXML_TEXT("descendant-or-self")) - return axis_descendant_or_self; - - break; - - case 'f': - if (name == PUGIXML_TEXT("following")) - return axis_following; - else if (name == PUGIXML_TEXT("following-sibling")) - return axis_following_sibling; - - break; - - case 'n': - if (name == PUGIXML_TEXT("namespace")) - return axis_namespace; - - break; - - case 'p': - if (name == PUGIXML_TEXT("parent")) - return axis_parent; - else if (name == PUGIXML_TEXT("preceding")) - return axis_preceding; - else if (name == PUGIXML_TEXT("preceding-sibling")) - return axis_preceding_sibling; - - break; - - case 's': - if (name == PUGIXML_TEXT("self")) - return axis_self; - - break; - - default: - break; - } - - specified = false; - return axis_child; - } - - nodetest_t parse_node_test_type(const xpath_lexer_string& name) - { - switch (name.begin[0]) - { - case 'c': - if (name == PUGIXML_TEXT("comment")) - return nodetest_type_comment; - - break; - - case 'n': - if (name == PUGIXML_TEXT("node")) - return nodetest_type_node; - - break; - - case 'p': - if (name == PUGIXML_TEXT("processing-instruction")) - return nodetest_type_pi; - - break; - - case 't': - if (name == PUGIXML_TEXT("text")) - return nodetest_type_text; - - break; - - default: - break; - } - - return nodetest_none; - } - - // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall - xpath_ast_node* parse_primary_expression() - { - switch (_lexer.current()) - { - case lex_var_ref: - { - xpath_lexer_string name = _lexer.contents(); - - if (!_variables) - throw_error("Unknown variable: variable set is not provided"); - - xpath_variable* var = 0; - if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) - throw_error_oom(); - - if (!var) - throw_error("Unknown variable: variable set does not contain the given name"); - - _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_variable, var->type(), var); - } - - case lex_open_brace: - { - _lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (_lexer.current() != lex_close_brace) - throw_error("Unmatched braces"); - - _lexer.next(); - - return n; - } - - case lex_quoted_string: - { - const char_t* value = alloc_string(_lexer.contents()); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_string_constant, xpath_type_string, value); - _lexer.next(); - - return n; - } - - case lex_number: - { - double value = 0; - - if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) - throw_error_oom(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_number_constant, xpath_type_number, value); - _lexer.next(); - - return n; - } - - case lex_string: - { - xpath_ast_node* args[2] = {0}; - size_t argc = 0; - - xpath_lexer_string function = _lexer.contents(); - _lexer.next(); - - xpath_ast_node* last_arg = 0; - - if (_lexer.current() != lex_open_brace) - throw_error("Unrecognized function call"); - _lexer.next(); - - if (_lexer.current() != lex_close_brace) - args[argc++] = parse_expression(); - - while (_lexer.current() != lex_close_brace) - { - if (_lexer.current() != lex_comma) - throw_error("No comma between function arguments"); - _lexer.next(); - - xpath_ast_node* n = parse_expression(); - - if (argc < 2) args[argc] = n; - else last_arg->set_next(n); - - argc++; - last_arg = n; - } - - _lexer.next(); - - return parse_function(function, argc, args); - } - - default: - throw_error("Unrecognizable primary expression"); - - return 0; - } - } - - // FilterExpr ::= PrimaryExpr | FilterExpr Predicate - // Predicate ::= '[' PredicateExpr ']' - // PredicateExpr ::= Expr - xpath_ast_node* parse_filter_expression() - { - xpath_ast_node* n = parse_primary_expression(); - - while (_lexer.current() == lex_open_square_brace) - { - _lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - if (n->rettype() != xpath_type_node_set) throw_error("Predicate has to be applied to node set"); - - n = new (alloc_node()) xpath_ast_node(ast_filter, n, expr, predicate_default); - - if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); - - _lexer.next(); - } - - return n; - } - - // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep - // AxisSpecifier ::= AxisName '::' | '@'? - // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' - // NameTest ::= '*' | NCName ':' '*' | QName - // AbbreviatedStep ::= '.' | '..' - xpath_ast_node* parse_step(xpath_ast_node* set) - { - if (set && set->rettype() != xpath_type_node_set) - throw_error("Step has to be applied to node set"); - - bool axis_specified = false; - axis_t axis = axis_child; // implied child axis - - if (_lexer.current() == lex_axis_attribute) - { - axis = axis_attribute; - axis_specified = true; - - _lexer.next(); - } - else if (_lexer.current() == lex_dot) - { - _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0); - } - else if (_lexer.current() == lex_double_dot) - { - _lexer.next(); - - return new (alloc_node()) xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0); - } - - nodetest_t nt_type = nodetest_none; - xpath_lexer_string nt_name; - - if (_lexer.current() == lex_string) - { - // node name test - nt_name = _lexer.contents(); - _lexer.next(); - - // was it an axis name? - if (_lexer.current() == lex_double_colon) - { - // parse axis name - if (axis_specified) throw_error("Two axis specifiers in one step"); - - axis = parse_axis_name(nt_name, axis_specified); - - if (!axis_specified) throw_error("Unknown axis"); - - // read actual node test - _lexer.next(); - - if (_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - nt_name = xpath_lexer_string(); - _lexer.next(); - } - else if (_lexer.current() == lex_string) - { - nt_name = _lexer.contents(); - _lexer.next(); - } - else throw_error("Unrecognized node test"); - } - - if (nt_type == nodetest_none) - { - // node type test or processing-instruction - if (_lexer.current() == lex_open_brace) - { - _lexer.next(); - - if (_lexer.current() == lex_close_brace) - { - _lexer.next(); - - nt_type = parse_node_test_type(nt_name); - - if (nt_type == nodetest_none) throw_error("Unrecognized node type"); - - nt_name = xpath_lexer_string(); - } - else if (nt_name == PUGIXML_TEXT("processing-instruction")) - { - if (_lexer.current() != lex_quoted_string) - throw_error("Only literals are allowed as arguments to processing-instruction()"); - - nt_type = nodetest_pi; - nt_name = _lexer.contents(); - _lexer.next(); - - if (_lexer.current() != lex_close_brace) - throw_error("Unmatched brace near processing-instruction()"); - _lexer.next(); - } - else - throw_error("Unmatched brace near node type test"); - - } - // QName or NCName:* - else - { - if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* - { - nt_name.end--; // erase * - - nt_type = nodetest_all_in_namespace; - } - else nt_type = nodetest_name; - } - } - } - else if (_lexer.current() == lex_multiply) - { - nt_type = nodetest_all; - _lexer.next(); - } - else throw_error("Unrecognized node test"); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step, set, axis, nt_type, alloc_string(nt_name)); - - xpath_ast_node* last = 0; - - while (_lexer.current() == lex_open_square_brace) - { - _lexer.next(); - - xpath_ast_node* expr = parse_expression(); - - xpath_ast_node* pred = new (alloc_node()) xpath_ast_node(ast_predicate, 0, expr, predicate_default); - - if (_lexer.current() != lex_close_square_brace) - throw_error("Unmatched square brace"); - _lexer.next(); - - if (last) last->set_next(pred); - else n->set_right(pred); - - last = pred; - } - - return n; - } - - // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step - xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) - { - xpath_ast_node* n = parse_step(set); - - while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) - { - lexeme_t l = _lexer.current(); - _lexer.next(); - - if (l == lex_double_slash) - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - - n = parse_step(n); - } - - return n; - } - - // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath - // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath - xpath_ast_node* parse_location_path() - { - if (_lexer.current() == lex_slash) - { - _lexer.next(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - - // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path - lexeme_t l = _lexer.current(); - - if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) - return parse_relative_location_path(n); - else - return n; - } - else if (_lexer.current() == lex_double_slash) - { - _lexer.next(); - - xpath_ast_node* n = new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set); - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - - return parse_relative_location_path(n); - } - - // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 - return parse_relative_location_path(0); - } - - // PathExpr ::= LocationPath - // | FilterExpr - // | FilterExpr '/' RelativeLocationPath - // | FilterExpr '//' RelativeLocationPath - // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr - // UnaryExpr ::= UnionExpr | '-' UnaryExpr - xpath_ast_node* parse_path_or_unary_expression() - { - // Clarification. - // PathExpr begins with either LocationPath or FilterExpr. - // FilterExpr begins with PrimaryExpr - // PrimaryExpr begins with '$' in case of it being a variable reference, - // '(' in case of it being an expression, string literal, number constant or - // function call. - - if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || - _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || - _lexer.current() == lex_string) - { - if (_lexer.current() == lex_string) - { - // This is either a function call, or not - if not, we shall proceed with location path - const char_t* state = _lexer.state(); - - while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; - - if (*state != '(') return parse_location_path(); - - // This looks like a function call; however this still can be a node-test. Check it. - if (parse_node_test_type(_lexer.contents()) != nodetest_none) return parse_location_path(); - } - - xpath_ast_node* n = parse_filter_expression(); - - if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) - { - lexeme_t l = _lexer.current(); - _lexer.next(); - - if (l == lex_double_slash) - { - if (n->rettype() != xpath_type_node_set) throw_error("Step has to be applied to node set"); - - n = new (alloc_node()) xpath_ast_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); - } - - // select from location path - return parse_relative_location_path(n); - } - - return n; - } - else if (_lexer.current() == lex_minus) - { - _lexer.next(); - - // precedence 7+ - only parses union expressions - xpath_ast_node* expr = parse_expression_rec(parse_path_or_unary_expression(), 7); - - return new (alloc_node()) xpath_ast_node(ast_op_negate, xpath_type_number, expr); - } - else - return parse_location_path(); - } - - struct binary_op_t - { - ast_type_t asttype; - xpath_value_type rettype; - int precedence; - - binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) - { - } - - binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) - { - } - - static binary_op_t parse(xpath_lexer& lexer) - { - switch (lexer.current()) - { - case lex_string: - if (lexer.contents() == PUGIXML_TEXT("or")) - return binary_op_t(ast_op_or, xpath_type_boolean, 1); - else if (lexer.contents() == PUGIXML_TEXT("and")) - return binary_op_t(ast_op_and, xpath_type_boolean, 2); - else if (lexer.contents() == PUGIXML_TEXT("div")) - return binary_op_t(ast_op_divide, xpath_type_number, 6); - else if (lexer.contents() == PUGIXML_TEXT("mod")) - return binary_op_t(ast_op_mod, xpath_type_number, 6); - else - return binary_op_t(); - - case lex_equal: - return binary_op_t(ast_op_equal, xpath_type_boolean, 3); - - case lex_not_equal: - return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); - - case lex_less: - return binary_op_t(ast_op_less, xpath_type_boolean, 4); - - case lex_greater: - return binary_op_t(ast_op_greater, xpath_type_boolean, 4); - - case lex_less_or_equal: - return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); - - case lex_greater_or_equal: - return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); - - case lex_plus: - return binary_op_t(ast_op_add, xpath_type_number, 5); - - case lex_minus: - return binary_op_t(ast_op_subtract, xpath_type_number, 5); - - case lex_multiply: - return binary_op_t(ast_op_multiply, xpath_type_number, 6); - - case lex_union: - return binary_op_t(ast_op_union, xpath_type_node_set, 7); - - default: - return binary_op_t(); - } - } - }; - - xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) - { - binary_op_t op = binary_op_t::parse(_lexer); - - while (op.asttype != ast_unknown && op.precedence >= limit) - { - _lexer.next(); - - xpath_ast_node* rhs = parse_path_or_unary_expression(); - - binary_op_t nextop = binary_op_t::parse(_lexer); - - while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) - { - rhs = parse_expression_rec(rhs, nextop.precedence); - - nextop = binary_op_t::parse(_lexer); - } - - if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) - throw_error("Union operator has to be applied to node sets"); - - lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs); - - op = binary_op_t::parse(_lexer); - } - - return lhs; - } - - // Expr ::= OrExpr - // OrExpr ::= AndExpr | OrExpr 'or' AndExpr - // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr - // EqualityExpr ::= RelationalExpr - // | EqualityExpr '=' RelationalExpr - // | EqualityExpr '!=' RelationalExpr - // RelationalExpr ::= AdditiveExpr - // | RelationalExpr '<' AdditiveExpr - // | RelationalExpr '>' AdditiveExpr - // | RelationalExpr '<=' AdditiveExpr - // | RelationalExpr '>=' AdditiveExpr - // AdditiveExpr ::= MultiplicativeExpr - // | AdditiveExpr '+' MultiplicativeExpr - // | AdditiveExpr '-' MultiplicativeExpr - // MultiplicativeExpr ::= UnaryExpr - // | MultiplicativeExpr '*' UnaryExpr - // | MultiplicativeExpr 'div' UnaryExpr - // | MultiplicativeExpr 'mod' UnaryExpr - xpath_ast_node* parse_expression() - { - return parse_expression_rec(parse_path_or_unary_expression(), 0); - } - - xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result) - { - } - - xpath_ast_node* parse() - { - xpath_ast_node* result = parse_expression(); - - if (_lexer.current() != lex_eof) - { - // there are still unparsed tokens left, error - throw_error("Incorrect query"); - } - - return result; - } - - static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) - { - xpath_parser parser(query, variables, alloc, result); - - #ifdef PUGIXML_NO_EXCEPTIONS - int error = setjmp(parser._error_handler); - - return (error == 0) ? parser.parse() : 0; - #else - return parser.parse(); - #endif - } - }; - - struct xpath_query_impl - { - static xpath_query_impl* create() - { - void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); - if (!memory) return 0; - - return new (memory) xpath_query_impl(); - } - - static void destroy(xpath_query_impl* impl) - { - // free all allocated pages - impl->alloc.release(); - - // free allocator memory (with the first page) - xml_memory::deallocate(impl); - } - - xpath_query_impl(): root(0), alloc(&block) - { - block.next = 0; - block.capacity = sizeof(block.data); - } - - xpath_ast_node* root; - xpath_allocator alloc; - xpath_memory_block block; - }; - - PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl, const xpath_node& n, xpath_stack_data& sd) - { - if (!impl) return xpath_string(); - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_string(); - #endif - - xpath_context c(n, 1, 1); - - return impl->root->eval_string(c, sd.stack); - } - - PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) - { - if (!impl) return 0; - - if (impl->root->rettype() != xpath_type_node_set) - { - #ifdef PUGIXML_NO_EXCEPTIONS - return 0; - #else - xpath_parse_result res; - res.error = "Expression does not evaluate to node set"; - - throw xpath_exception(res); - #endif - } - - return impl->root; - } -PUGI__NS_END - -namespace pugi -{ -#ifndef PUGIXML_NO_EXCEPTIONS - PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) - { - assert(_result.error); - } - - PUGI__FN const char* xpath_exception::what() const throw() - { - return _result.error; - } - - PUGI__FN const xpath_parse_result& xpath_exception::result() const - { - return _result; - } -#endif - - PUGI__FN xpath_node::xpath_node() - { - } - - PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) - { - } - - PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) - { - } - - PUGI__FN xml_node xpath_node::node() const - { - return _attribute ? xml_node() : _node; - } - - PUGI__FN xml_attribute xpath_node::attribute() const - { - return _attribute; - } - - PUGI__FN xml_node xpath_node::parent() const - { - return _attribute ? _node : _node.parent(); - } - - PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) - { - } - - PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const - { - return (_node || _attribute) ? unspecified_bool_xpath_node : 0; - } - - PUGI__FN bool xpath_node::operator!() const - { - return !(_node || _attribute); - } - - PUGI__FN bool xpath_node::operator==(const xpath_node& n) const - { - return _node == n._node && _attribute == n._attribute; - } - - PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const - { - return _node != n._node || _attribute != n._attribute; - } - -#ifdef __BORLANDC__ - PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) - { - return (bool)lhs && rhs; - } - - PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) - { - return (bool)lhs || rhs; - } -#endif - - PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) - { - assert(begin_ <= end_); - - size_t size_ = static_cast(end_ - begin_); - - if (size_ <= 1) - { - // deallocate old buffer - if (_begin != &_storage) impl::xml_memory::deallocate(_begin); - - // use internal buffer - if (begin_ != end_) _storage = *begin_; - - _begin = &_storage; - _end = &_storage + size_; - _type = type_; - } - else - { - // make heap copy - xpath_node* storage = static_cast(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); - - if (!storage) - { - #ifdef PUGIXML_NO_EXCEPTIONS - return; - #else - throw std::bad_alloc(); - #endif - } - - memcpy(storage, begin_, size_ * sizeof(xpath_node)); - - // deallocate old buffer - if (_begin != &_storage) impl::xml_memory::deallocate(_begin); - - // finalize - _begin = storage; - _end = storage + size_; - _type = type_; - } - } - -#if __cplusplus >= 201103 - PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) - { - _type = rhs._type; - _storage = rhs._storage; - _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin; - _end = _begin + (rhs._end - rhs._begin); - - rhs._type = type_unsorted; - rhs._begin = &rhs._storage; - rhs._end = rhs._begin; - } -#endif - - PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - } - - PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - _assign(begin_, end_, type_); - } - - PUGI__FN xpath_node_set::~xpath_node_set() - { - if (_begin != &_storage) - impl::xml_memory::deallocate(_begin); - } - - PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - _assign(ns._begin, ns._end, ns._type); - } - - PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) - { - if (this == &ns) return *this; - - _assign(ns._begin, ns._end, ns._type); - - return *this; - } - -#if __cplusplus >= 201103 - PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs): _type(type_unsorted), _begin(&_storage), _end(&_storage) - { - _move(rhs); - } - - PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) - { - if (this == &rhs) return *this; - - if (_begin != &_storage) - impl::xml_memory::deallocate(_begin); - - _move(rhs); - - return *this; - } -#endif - - PUGI__FN xpath_node_set::type_t xpath_node_set::type() const - { - return _type; - } - - PUGI__FN size_t xpath_node_set::size() const - { - return _end - _begin; - } - - PUGI__FN bool xpath_node_set::empty() const - { - return _begin == _end; - } - - PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const - { - assert(index < size()); - return _begin[index]; - } - - PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const - { - return _begin; - } - - PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const - { - return _end; - } - - PUGI__FN void xpath_node_set::sort(bool reverse) - { - _type = impl::xpath_sort(_begin, _end, _type, reverse); - } - - PUGI__FN xpath_node xpath_node_set::first() const - { - return impl::xpath_first(_begin, _end, _type); - } - - PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) - { - } - - PUGI__FN xpath_parse_result::operator bool() const - { - return error == 0; - } - - PUGI__FN const char* xpath_parse_result::description() const - { - return error ? error : "No error"; - } - - PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) - { - } - - PUGI__FN const char_t* xpath_variable::name() const - { - switch (_type) - { - case xpath_type_node_set: - return static_cast(this)->name; - - case xpath_type_number: - return static_cast(this)->name; - - case xpath_type_string: - return static_cast(this)->name; - - case xpath_type_boolean: - return static_cast(this)->name; - - default: - assert(false && "Invalid variable type"); - return 0; - } - } - - PUGI__FN xpath_value_type xpath_variable::type() const - { - return _type; - } - - PUGI__FN bool xpath_variable::get_boolean() const - { - return (_type == xpath_type_boolean) ? static_cast(this)->value : false; - } - - PUGI__FN double xpath_variable::get_number() const - { - return (_type == xpath_type_number) ? static_cast(this)->value : impl::gen_nan(); - } - - PUGI__FN const char_t* xpath_variable::get_string() const - { - const char_t* value = (_type == xpath_type_string) ? static_cast(this)->value : 0; - return value ? value : PUGIXML_TEXT(""); - } - - PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const - { - return (_type == xpath_type_node_set) ? static_cast(this)->value : impl::dummy_node_set; - } - - PUGI__FN bool xpath_variable::set(bool value) - { - if (_type != xpath_type_boolean) return false; - - static_cast(this)->value = value; - return true; - } - - PUGI__FN bool xpath_variable::set(double value) - { - if (_type != xpath_type_number) return false; - - static_cast(this)->value = value; - return true; - } - - PUGI__FN bool xpath_variable::set(const char_t* value) - { - if (_type != xpath_type_string) return false; - - impl::xpath_variable_string* var = static_cast(this); - - // duplicate string - size_t size = (impl::strlength(value) + 1) * sizeof(char_t); - - char_t* copy = static_cast(impl::xml_memory::allocate(size)); - if (!copy) return false; - - memcpy(copy, value, size); - - // replace old string - if (var->value) impl::xml_memory::deallocate(var->value); - var->value = copy; - - return true; - } - - PUGI__FN bool xpath_variable::set(const xpath_node_set& value) - { - if (_type != xpath_type_node_set) return false; - - static_cast(this)->value = value; - return true; - } - - PUGI__FN xpath_variable_set::xpath_variable_set() - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - _data[i] = 0; - } - - PUGI__FN xpath_variable_set::~xpath_variable_set() - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - _destroy(_data[i]); - } - - PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - _data[i] = 0; - - _assign(rhs); - } - - PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) - { - if (this == &rhs) return *this; - - _assign(rhs); - - return *this; - } - -#if __cplusplus >= 201103 - PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - { - _data[i] = rhs._data[i]; - rhs._data[i] = 0; - } - } - - PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - { - _destroy(_data[i]); - - _data[i] = rhs._data[i]; - rhs._data[i] = 0; - } - - return *this; - } -#endif - - PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) - { - xpath_variable_set temp; - - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) - return; - - _swap(temp); - } - - PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) - { - for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) - { - xpath_variable* chain = _data[i]; - - _data[i] = rhs._data[i]; - rhs._data[i] = chain; - } - } - - PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const - { - if (!name) { - return 0; - } - const size_t hash_size = sizeof(_data) / sizeof(_data[0]); - size_t hash = impl::hash_string(name) % hash_size; - - // look for existing variable - for (xpath_variable* var = _data[hash]; var; var = var->_next) - if (var->name() && impl::strequal(var->name(), name)) - return var; - - return 0; - } - - PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) - { - xpath_variable* last = 0; - - while (var) - { - // allocate storage for new variable - xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); - if (!nvar) return false; - - // link the variable to the result immediately to handle failures gracefully - if (last) - last->_next = nvar; - else - *out_result = nvar; - - last = nvar; - - // copy the value; this can fail due to out-of-memory conditions - if (!impl::copy_xpath_variable(nvar, var)) return false; - - var = var->_next; - } - - return true; - } - - PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) - { - while (var) - { - xpath_variable* next = var->_next; - - impl::delete_xpath_variable(var->_type, var); - - var = next; - } - } - - PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) - { - if (!name) { - return nullptr; - } - const size_t hash_size = sizeof(_data) / sizeof(_data[0]); - size_t hash = impl::hash_string(name) % hash_size; - - // look for existing variable - for (xpath_variable* var = _data[hash]; var; var = var->_next) - if (var->name() && impl::strequal(var->name(), name)) - return var->type() == type ? var : 0; - - // add new variable - xpath_variable* result = impl::new_xpath_variable(type, name); - - if (result) - { - result->_next = _data[hash]; - - _data[hash] = result; - } - - return result; - } - - PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) - { - xpath_variable* var = add(name, xpath_type_boolean); - return var ? var->set(value) : false; - } - - PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) - { - xpath_variable* var = add(name, xpath_type_number); - return var ? var->set(value) : false; - } - - PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) - { - xpath_variable* var = add(name, xpath_type_string); - return var ? var->set(value) : false; - } - - PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) - { - xpath_variable* var = add(name, xpath_type_node_set); - return var ? var->set(value) : false; - } - - PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) - { - return _find(name); - } - - PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const - { - return _find(name); - } - - PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) - { - impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); - - if (!qimpl) - { - #ifdef PUGIXML_NO_EXCEPTIONS - _result.error = "Out of memory"; - #else - throw std::bad_alloc(); - #endif - } - else - { - using impl::auto_deleter; // MSVC7 workaround - auto_deleter impl(qimpl, impl::xpath_query_impl::destroy); - - qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); - - if (qimpl->root) - { - qimpl->root->optimize(&qimpl->alloc); - - _impl = impl.release(); - _result.error = 0; - } - } - } - - PUGI__FN xpath_query::xpath_query(): _impl(0) - { - } - - PUGI__FN xpath_query::~xpath_query() - { - if (_impl) - impl::xpath_query_impl::destroy(static_cast(_impl)); - } - -#if __cplusplus >= 201103 - PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) - { - _impl = rhs._impl; - _result = rhs._result; - rhs._impl = 0; - rhs._result = xpath_parse_result(); - } - - PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) - { - if (this == &rhs) return *this; - - if (_impl) - impl::xpath_query_impl::destroy(static_cast(_impl)); - - _impl = rhs._impl; - _result = rhs._result; - rhs._impl = 0; - rhs._result = xpath_parse_result(); - - return *this; - } -#endif - - PUGI__FN xpath_value_type xpath_query::return_type() const - { - if (!_impl) return xpath_type_none; - - return static_cast(_impl)->root->rettype(); - } - - PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const - { - if (!_impl) return false; - - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return false; - #endif - - return static_cast(_impl)->root->eval_boolean(c, sd.stack); - } - - PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const - { - if (!_impl) return impl::gen_nan(); - - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return impl::gen_nan(); - #endif - - return static_cast(_impl)->root->eval_number(c, sd.stack); - } - -#ifndef PUGIXML_NO_STL - PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const - { - impl::xpath_stack_data sd; - - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); - - return string_t(r.c_str(), r.length()); - } -#endif - - PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const - { - impl::xpath_stack_data sd; - - impl::xpath_string r = impl::evaluate_string_impl(static_cast(_impl), n, sd); - - size_t full_size = r.length() + 1; - - if (capacity > 0) - { - size_t size = (full_size < capacity) ? full_size : capacity; - assert(size > 0); - - memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); - buffer[size - 1] = 0; - } - - return full_size; - } - - PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const - { - impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); - if (!root) return xpath_node_set(); - - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node_set(); - #endif - - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); - - return xpath_node_set(r.begin(), r.end(), r.type()); - } - - PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const - { - impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast(_impl)); - if (!root) return xpath_node(); - - impl::xpath_context c(n, 1, 1); - impl::xpath_stack_data sd; - - #ifdef PUGIXML_NO_EXCEPTIONS - if (setjmp(sd.error_handler)) return xpath_node(); - #endif - - impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); - - return r.first(); - } - - PUGI__FN const xpath_parse_result& xpath_query::result() const - { - return _result; - } - - PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) - { - } - - PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const - { - return _impl ? unspecified_bool_xpath_query : 0; - } - - PUGI__FN bool xpath_query::operator!() const - { - return !_impl; - } - - PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const - { - xpath_query q(query, variables); - return select_node(q); - } - - PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const - { - return query.evaluate_node(*this); - } - - PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const - { - xpath_query q(query, variables); - return select_nodes(q); - } - - PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const - { - return query.evaluate_node_set(*this); - } - - PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const - { - xpath_query q(query, variables); - return select_single_node(q); - } - - PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const - { - return query.evaluate_node(*this); - } -} - -#endif - -#ifdef __BORLANDC__ -# pragma option pop -#endif - -// Intel C++ does not properly keep warning state for function templates, -// so popping warning state at the end of translation unit leads to warnings in the middle. -#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) -# pragma warning(pop) -#endif - -// Undefine all local macros (makes sure we're not leaking macros in header-only mode) -#undef PUGI__NO_INLINE -#undef PUGI__UNLIKELY -#undef PUGI__STATIC_ASSERT -#undef PUGI__DMC_VOLATILE -#undef PUGI__MSVC_CRT_VERSION -#undef PUGI__NS_BEGIN -#undef PUGI__NS_END -#undef PUGI__FN -#undef PUGI__FN_NO_INLINE -#undef PUGI__GETHEADER_IMPL -#undef PUGI__GETPAGE_IMPL -#undef PUGI__GETPAGE -#undef PUGI__NODETYPE -#undef PUGI__IS_CHARTYPE_IMPL -#undef PUGI__IS_CHARTYPE -#undef PUGI__IS_CHARTYPEX -#undef PUGI__ENDSWITH -#undef PUGI__SKIPWS -#undef PUGI__OPTSET -#undef PUGI__PUSHNODE -#undef PUGI__POPNODE -#undef PUGI__SCANFOR -#undef PUGI__SCANWHILE -#undef PUGI__SCANWHILE_UNROLL -#undef PUGI__ENDSEG -#undef PUGI__THROW_ERROR -#undef PUGI__CHECK_ERROR - -#endif - -/** - * Copyright (c) 2006-2016 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ diff --git a/inference-engine/samples/validation_app/pugixml/pugixml.hpp b/inference-engine/samples/validation_app/pugixml/pugixml.hpp deleted file mode 100644 index fd3067fc838ade..00000000000000 --- a/inference-engine/samples/validation_app/pugixml/pugixml.hpp +++ /dev/null @@ -1,1404 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#ifndef PUGIXML_VERSION -// Define version macro; evaluates to major * 100 + minor so that it's safe to use in less-than comparisons -# define PUGIXML_VERSION 170 -#endif - -// Include user configuration file (this can define various configuration macros) -#include "pugiconfig.hpp" - -#ifndef HEADER_PUGIXML_HPP -#define HEADER_PUGIXML_HPP - -// Include stddef.h for size_t and ptrdiff_t -#include - -// Include exception header for XPath -#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS) -# include -#endif - -// Include STL headers -#ifndef PUGIXML_NO_STL -# include -# include -# include -#endif - -// Macro for deprecated features -#ifndef PUGIXML_DEPRECATED -# if defined(__GNUC__) -# define PUGIXML_DEPRECATED __attribute__((deprecated)) -# elif defined(_MSC_VER) && _MSC_VER >= 1300 -# define PUGIXML_DEPRECATED __declspec(deprecated) -# else -# define PUGIXML_DEPRECATED -# endif -#endif - -// If no API is defined, assume default -#ifndef PUGIXML_API -# define PUGIXML_API -#endif - -// If no API for classes is defined, assume default -#ifndef PUGIXML_CLASS -# define PUGIXML_CLASS PUGIXML_API -#endif - -// If no API for functions is defined, assume default -#ifndef PUGIXML_FUNCTION -# define PUGIXML_FUNCTION PUGIXML_API -#endif - -// If the platform is known to have long long support, enable long long functions -#ifndef PUGIXML_HAS_LONG_LONG -# if __cplusplus >= 201103 -# define PUGIXML_HAS_LONG_LONG -# elif defined(_MSC_VER) && _MSC_VER >= 1400 -# define PUGIXML_HAS_LONG_LONG -# endif -#endif - -// Character interface macros -#ifdef PUGIXML_WCHAR_MODE -# define PUGIXML_TEXT(t) L ## t -# define PUGIXML_CHAR wchar_t -#else -# define PUGIXML_TEXT(t) t -# define PUGIXML_CHAR char -#endif - -namespace pugi -{ - // Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE - typedef PUGIXML_CHAR char_t; - -#ifndef PUGIXML_NO_STL - // String type used for operations that work with STL string; depends on PUGIXML_WCHAR_MODE - typedef std::basic_string, std::allocator > string_t; -#endif -} - -// The PugiXML namespace -namespace pugi -{ - // Tree node types - enum xml_node_type - { - node_null, // Empty (null) node handle - node_document, // A document tree's absolute root - node_element, // Element tag, i.e. '' - node_pcdata, // Plain character data, i.e. 'text' - node_cdata, // Character data, i.e. '' - node_comment, // Comment tag, i.e. '' - node_pi, // Processing instruction, i.e. '' - node_declaration, // Document declaration, i.e. '' - node_doctype // Document type declaration, i.e. '' - }; - - // Parsing options - - // Minimal parsing mode (equivalent to turning all other flags off). - // Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed. - const unsigned int parse_minimal = 0x0000; - - // This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default. - const unsigned int parse_pi = 0x0001; - - // This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default. - const unsigned int parse_comments = 0x0002; - - // This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default. - const unsigned int parse_cdata = 0x0004; - - // This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree. - // This flag is off by default; turning it on usually results in slower parsing and more memory consumption. - const unsigned int parse_ws_pcdata = 0x0008; - - // This flag determines if character and entity references are expanded during parsing. This flag is on by default. - const unsigned int parse_escapes = 0x0010; - - // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default. - const unsigned int parse_eol = 0x0020; - - // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default. - const unsigned int parse_wconv_attribute = 0x0040; - - // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default. - const unsigned int parse_wnorm_attribute = 0x0080; - - // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default. - const unsigned int parse_declaration = 0x0100; - - // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default. - const unsigned int parse_doctype = 0x0200; - - // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only - // of whitespace is added to the DOM tree. - // This flag is off by default; turning it on may result in slower parsing and more memory consumption. - const unsigned int parse_ws_pcdata_single = 0x0400; - - // This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default. - const unsigned int parse_trim_pcdata = 0x0800; - - // This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document - // is a valid document. This flag is off by default. - const unsigned int parse_fragment = 0x1000; - - // This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of - // the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments. - // This flag is off by default. - const unsigned int parse_embed_pcdata = 0x2000; - - // The default parsing mode. - // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, - // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. - const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; - - // The full parsing mode. - // Nodes of all types are added to the DOM tree, character/reference entities are expanded, - // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. - const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype; - - // These flags determine the encoding of input data for XML document - enum xml_encoding - { - encoding_auto, // Auto-detect input encoding using BOM or < / class xml_object_range - { - public: - typedef It const_iterator; - typedef It iterator; - - xml_object_range(It b, It e): _begin(b), _end(e) - { - } - - It begin() const { return _begin; } - It end() const { return _end; } - - private: - It _begin, _end; - }; - - // Writer interface for node printing (see xml_node::print) - class PUGIXML_CLASS xml_writer - { - public: - virtual ~xml_writer() {} - - // Write memory chunk into stream/file/whatever - virtual void write(const void* data, size_t size) = 0; - }; - - // xml_writer implementation for FILE* - class PUGIXML_CLASS xml_writer_file: public xml_writer - { - public: - // Construct writer from a FILE* object; void* is used to avoid header dependencies on stdio - xml_writer_file(void* file); - - virtual void write(const void* data, size_t size); - - private: - void* file; - }; - - #ifndef PUGIXML_NO_STL - // xml_writer implementation for streams - class PUGIXML_CLASS xml_writer_stream: public xml_writer - { - public: - // Construct writer from an output stream object - xml_writer_stream(std::basic_ostream >& stream); - xml_writer_stream(std::basic_ostream >& stream); - - virtual void write(const void* data, size_t size); - - private: - std::basic_ostream >* narrow_stream; - std::basic_ostream >* wide_stream; - }; - #endif - - // A light-weight handle for manipulating attributes in DOM tree - class PUGIXML_CLASS xml_attribute - { - friend class xml_attribute_iterator; - friend class xml_node; - - private: - xml_attribute_struct* _attr; - - typedef void (*unspecified_bool_type)(xml_attribute***); - - public: - // Default constructor. Constructs an empty attribute. - xml_attribute(); - - // Constructs attribute from internal pointer - explicit xml_attribute(xml_attribute_struct* attr); - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - // Comparison operators (compares wrapped attribute pointers) - bool operator==(const xml_attribute& r) const; - bool operator!=(const xml_attribute& r) const; - bool operator<(const xml_attribute& r) const; - bool operator>(const xml_attribute& r) const; - bool operator<=(const xml_attribute& r) const; - bool operator>=(const xml_attribute& r) const; - - // Check if attribute is empty - bool empty() const; - - // Get attribute name/value, or "" if attribute is empty - const char_t* name() const; - const char_t* value() const; - - // Get attribute value, or the default value if attribute is empty - const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const; - - // Get attribute value as a number, or the default value if conversion did not succeed or attribute is empty - int as_int(int def = 0) const; - unsigned int as_uint(unsigned int def = 0) const; - double as_double(double def = 0) const; - float as_float(float def = 0) const; - - #ifdef PUGIXML_HAS_LONG_LONG - long long as_llong(long long def = 0) const; - unsigned long long as_ullong(unsigned long long def = 0) const; - #endif - - // Get attribute value as bool (returns true if first character is in '1tTyY' set), or the default value if attribute is empty - bool as_bool(bool def = false) const; - - // Set attribute name/value (returns false if attribute is empty or there is not enough memory) - bool set_name(const char_t* rhs); - bool set_value(const char_t* rhs); - - // Set attribute value with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") - bool set_value(int rhs); - bool set_value(unsigned int rhs); - bool set_value(long rhs); - bool set_value(unsigned long rhs); - bool set_value(double rhs); - bool set_value(float rhs); - bool set_value(bool rhs); - - #ifdef PUGIXML_HAS_LONG_LONG - bool set_value(long long rhs); - bool set_value(unsigned long long rhs); - #endif - - // Set attribute value (equivalent to set_value without error checking) - xml_attribute& operator=(const char_t* rhs); - xml_attribute& operator=(int rhs); - xml_attribute& operator=(unsigned int rhs); - xml_attribute& operator=(long rhs); - xml_attribute& operator=(unsigned long rhs); - xml_attribute& operator=(double rhs); - xml_attribute& operator=(float rhs); - xml_attribute& operator=(bool rhs); - - #ifdef PUGIXML_HAS_LONG_LONG - xml_attribute& operator=(long long rhs); - xml_attribute& operator=(unsigned long long rhs); - #endif - - // Get next/previous attribute in the attribute list of the parent node - xml_attribute next_attribute() const; - xml_attribute previous_attribute() const; - - // Get hash value (unique for handles to the same object) - size_t hash_value() const; - - // Get internal pointer - xml_attribute_struct* internal_object() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_attribute& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_attribute& lhs, bool rhs); -#endif - - // A light-weight handle for manipulating nodes in DOM tree - class PUGIXML_CLASS xml_node - { - friend class xml_attribute_iterator; - friend class xml_node_iterator; - friend class xml_named_node_iterator; - - protected: - xml_node_struct* _root; - - typedef void (*unspecified_bool_type)(xml_node***); - - public: - // Default constructor. Constructs an empty node. - xml_node(); - - // Constructs node from internal pointer - explicit xml_node(xml_node_struct* p); - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - // Comparison operators (compares wrapped node pointers) - bool operator==(const xml_node& r) const; - bool operator!=(const xml_node& r) const; - bool operator<(const xml_node& r) const; - bool operator>(const xml_node& r) const; - bool operator<=(const xml_node& r) const; - bool operator>=(const xml_node& r) const; - - // Check if node is empty. - bool empty() const; - - // Get node type - xml_node_type type() const; - - // Get node name, or "" if node is empty or it has no name - const char_t* name() const; - - // Get node value, or "" if node is empty or it has no value - // Note: For text node.value() does not return "text"! Use child_value() or text() methods to access text inside nodes. - const char_t* value() const; - - // Get attribute list - xml_attribute first_attribute() const; - xml_attribute last_attribute() const; - - // Get children list - xml_node first_child() const; - xml_node last_child() const; - - // Get next/previous sibling in the children list of the parent node - xml_node next_sibling() const; - xml_node previous_sibling() const; - - // Get parent node - xml_node parent() const; - - // Get root of DOM tree this node belongs to - xml_node root() const; - - // Get text object for the current node - xml_text text() const; - - // Get child, attribute or next/previous sibling with the specified name - xml_node child(const char_t* name) const; - xml_attribute attribute(const char_t* name) const; - xml_node next_sibling(const char_t* name) const; - xml_node previous_sibling(const char_t* name) const; - - // Get attribute, starting the search from a hint (and updating hint so that searching for a sequence of attributes is fast) - xml_attribute attribute(const char_t* name, xml_attribute& hint) const; - - // Get child value of current node; that is, value of the first child node of type PCDATA/CDATA - const char_t* child_value() const; - - // Get child value of child with specified name. Equivalent to child(name).child_value(). - const char_t* child_value(const char_t* name) const; - - // Set node name/value (returns false if node is empty, there is not enough memory, or node can not have name/value) - bool set_name(const char_t* rhs); - bool set_value(const char_t* rhs); - - // Add attribute with specified name. Returns added attribute, or empty attribute on errors. - xml_attribute append_attribute(const char_t* name); - xml_attribute prepend_attribute(const char_t* name); - xml_attribute insert_attribute_after(const char_t* name, const xml_attribute& attr); - xml_attribute insert_attribute_before(const char_t* name, const xml_attribute& attr); - - // Add a copy of the specified attribute. Returns added attribute, or empty attribute on errors. - xml_attribute append_copy(const xml_attribute& proto); - xml_attribute prepend_copy(const xml_attribute& proto); - xml_attribute insert_copy_after(const xml_attribute& proto, const xml_attribute& attr); - xml_attribute insert_copy_before(const xml_attribute& proto, const xml_attribute& attr); - - // Add child node with specified type. Returns added node, or empty node on errors. - xml_node append_child(xml_node_type type = node_element); - xml_node prepend_child(xml_node_type type = node_element); - xml_node insert_child_after(xml_node_type type, const xml_node& node); - xml_node insert_child_before(xml_node_type type, const xml_node& node); - - // Add child element with specified name. Returns added node, or empty node on errors. - xml_node append_child(const char_t* name); - xml_node prepend_child(const char_t* name); - xml_node insert_child_after(const char_t* name, const xml_node& node); - xml_node insert_child_before(const char_t* name, const xml_node& node); - - // Add a copy of the specified node as a child. Returns added node, or empty node on errors. - xml_node append_copy(const xml_node& proto); - xml_node prepend_copy(const xml_node& proto); - xml_node insert_copy_after(const xml_node& proto, const xml_node& node); - xml_node insert_copy_before(const xml_node& proto, const xml_node& node); - - // Move the specified node to become a child of this node. Returns moved node, or empty node on errors. - xml_node append_move(const xml_node& moved); - xml_node prepend_move(const xml_node& moved); - xml_node insert_move_after(const xml_node& moved, const xml_node& node); - xml_node insert_move_before(const xml_node& moved, const xml_node& node); - - // Remove specified attribute - bool remove_attribute(const xml_attribute& a); - bool remove_attribute(const char_t* name); - - // Remove specified child - bool remove_child(const xml_node& n); - bool remove_child(const char_t* name); - - // Parses buffer as an XML document fragment and appends all nodes as children of the current node. - // Copies/converts the buffer, so it may be deleted or changed after the function returns. - // Note: append_buffer allocates memory that has the lifetime of the owning document; removing the appended nodes does not immediately reclaim that memory. - xml_parse_result append_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Find attribute using predicate. Returns first attribute for which predicate returned true. - template xml_attribute find_attribute(Predicate pred) const - { - if (!_root) return xml_attribute(); - - for (xml_attribute attrib = first_attribute(); attrib; attrib = attrib.next_attribute()) - if (pred(attrib)) - return attrib; - - return xml_attribute(); - } - - // Find child node using predicate. Returns first child for which predicate returned true. - template xml_node find_child(Predicate pred) const - { - if (!_root) return xml_node(); - - for (xml_node node = first_child(); node; node = node.next_sibling()) - if (pred(node)) - return node; - - return xml_node(); - } - - // Find node from subtree using predicate. Returns first node from subtree (depth-first), for which predicate returned true. - template xml_node find_node(Predicate pred) const - { - if (!_root) return xml_node(); - - xml_node cur = first_child(); - - while (cur._root && cur._root != _root) - { - if (pred(cur)) return cur; - - if (cur.first_child()) cur = cur.first_child(); - else if (cur.next_sibling()) cur = cur.next_sibling(); - else - { - while (!cur.next_sibling() && cur._root != _root) cur = cur.parent(); - - if (cur._root != _root) cur = cur.next_sibling(); - } - } - - return xml_node(); - } - - // Find child node by attribute name/value - xml_node find_child_by_attribute(const char_t* name, const char_t* attr_name, const char_t* attr_value) const; - xml_node find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const; - - #ifndef PUGIXML_NO_STL - // Get the absolute node path from root as a text string. - string_t path(char_t delimiter = '/') const; - #endif - - // Search for a node by path consisting of node names and . or .. elements. - xml_node first_element_by_path(const char_t* path, char_t delimiter = '/') const; - - // Recursively traverse subtree with xml_tree_walker - bool traverse(xml_tree_walker& walker); - - #ifndef PUGIXML_NO_XPATH - // Select single node by evaluating XPath query. Returns first node from the resulting node set. - xpath_node select_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node select_node(const xpath_query& query) const; - - // Select node set by evaluating XPath query - xpath_node_set select_nodes(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node_set select_nodes(const xpath_query& query) const; - - // (deprecated: use select_node instead) Select single node by evaluating XPath query. - xpath_node select_single_node(const char_t* query, xpath_variable_set* variables = 0) const; - xpath_node select_single_node(const xpath_query& query) const; - - #endif - - // Print subtree using a writer object - void print(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - - #ifndef PUGIXML_NO_STL - // Print subtree to stream - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto, unsigned int depth = 0) const; - void print(std::basic_ostream >& os, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, unsigned int depth = 0) const; - #endif - - // Child nodes iterators - typedef xml_node_iterator iterator; - - iterator begin() const; - iterator end() const; - - // Attribute iterators - typedef xml_attribute_iterator attribute_iterator; - - attribute_iterator attributes_begin() const; - attribute_iterator attributes_end() const; - - // Range-based for support - xml_object_range children() const; - xml_object_range children(const char_t* name) const; - xml_object_range attributes() const; - - // Get node offset in parsed file/string (in char_t units) for debugging purposes - ptrdiff_t offset_debug() const; - - // Get hash value (unique for handles to the same object) - size_t hash_value() const; - - // Get internal pointer - xml_node_struct* internal_object() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_node& lhs, bool rhs); -#endif - - // A helper for working with text inside PCDATA nodes - class PUGIXML_CLASS xml_text - { - friend class xml_node; - - xml_node_struct* _root; - - typedef void (*unspecified_bool_type)(xml_text***); - - explicit xml_text(xml_node_struct* root); - - xml_node_struct* _data_new(); - xml_node_struct* _data() const; - - public: - // Default constructor. Constructs an empty object. - xml_text(); - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - // Check if text object is empty - bool empty() const; - - // Get text, or "" if object is empty - const char_t* get() const; - - // Get text, or the default value if object is empty - const char_t* as_string(const char_t* def = PUGIXML_TEXT("")) const; - - // Get text as a number, or the default value if conversion did not succeed or object is empty - int as_int(int def = 0) const; - unsigned int as_uint(unsigned int def = 0) const; - double as_double(double def = 0) const; - float as_float(float def = 0) const; - - #ifdef PUGIXML_HAS_LONG_LONG - long long as_llong(long long def = 0) const; - unsigned long long as_ullong(unsigned long long def = 0) const; - #endif - - // Get text as bool (returns true if first character is in '1tTyY' set), or the default value if object is empty - bool as_bool(bool def = false) const; - - // Set text (returns false if object is empty or there is not enough memory) - bool set(const char_t* rhs); - - // Set text with type conversion (numbers are converted to strings, boolean is converted to "true"/"false") - bool set(int rhs); - bool set(unsigned int rhs); - bool set(long rhs); - bool set(unsigned long rhs); - bool set(double rhs); - bool set(float rhs); - bool set(bool rhs); - - #ifdef PUGIXML_HAS_LONG_LONG - bool set(long long rhs); - bool set(unsigned long long rhs); - #endif - - // Set text (equivalent to set without error checking) - xml_text& operator=(const char_t* rhs); - xml_text& operator=(int rhs); - xml_text& operator=(unsigned int rhs); - xml_text& operator=(long rhs); - xml_text& operator=(unsigned long rhs); - xml_text& operator=(double rhs); - xml_text& operator=(float rhs); - xml_text& operator=(bool rhs); - - #ifdef PUGIXML_HAS_LONG_LONG - xml_text& operator=(long long rhs); - xml_text& operator=(unsigned long long rhs); - #endif - - // Get the data node (node_pcdata or node_cdata) for this object - xml_node data() const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xml_text& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xml_text& lhs, bool rhs); -#endif - - // Child node iterator (a bidirectional iterator over a collection of xml_node) - class PUGIXML_CLASS xml_node_iterator - { - friend class xml_node; - - private: - mutable xml_node _wrap; - xml_node _parent; - - xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent); - - public: - // Iterator traits - typedef ptrdiff_t difference_type; - typedef xml_node value_type; - typedef xml_node* pointer; - typedef xml_node& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - // Default constructor - xml_node_iterator(); - - // Construct an iterator which points to the specified node - xml_node_iterator(const xml_node& node); - - // Iterator operators - bool operator==(const xml_node_iterator& rhs) const; - bool operator!=(const xml_node_iterator& rhs) const; - - xml_node& operator*() const; - xml_node* operator->() const; - - const xml_node_iterator& operator++(); - xml_node_iterator operator++(int); - - const xml_node_iterator& operator--(); - xml_node_iterator operator--(int); - }; - - // Attribute iterator (a bidirectional iterator over a collection of xml_attribute) - class PUGIXML_CLASS xml_attribute_iterator - { - friend class xml_node; - - private: - mutable xml_attribute _wrap; - xml_node _parent; - - xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent); - - public: - // Iterator traits - typedef ptrdiff_t difference_type; - typedef xml_attribute value_type; - typedef xml_attribute* pointer; - typedef xml_attribute& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - // Default constructor - xml_attribute_iterator(); - - // Construct an iterator which points to the specified attribute - xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent); - - // Iterator operators - bool operator==(const xml_attribute_iterator& rhs) const; - bool operator!=(const xml_attribute_iterator& rhs) const; - - xml_attribute& operator*() const; - xml_attribute* operator->() const; - - const xml_attribute_iterator& operator++(); - xml_attribute_iterator operator++(int); - - const xml_attribute_iterator& operator--(); - xml_attribute_iterator operator--(int); - }; - - // Named node range helper - class PUGIXML_CLASS xml_named_node_iterator - { - friend class xml_node; - - public: - // Iterator traits - typedef ptrdiff_t difference_type; - typedef xml_node value_type; - typedef xml_node* pointer; - typedef xml_node& reference; - - #ifndef PUGIXML_NO_STL - typedef std::bidirectional_iterator_tag iterator_category; - #endif - - // Default constructor - xml_named_node_iterator(); - - // Construct an iterator which points to the specified node - xml_named_node_iterator(const xml_node& node, const char_t* name); - - // Iterator operators - bool operator==(const xml_named_node_iterator& rhs) const; - bool operator!=(const xml_named_node_iterator& rhs) const; - - xml_node& operator*() const; - xml_node* operator->() const; - - const xml_named_node_iterator& operator++(); - xml_named_node_iterator operator++(int); - - const xml_named_node_iterator& operator--(); - xml_named_node_iterator operator--(int); - - private: - mutable xml_node _wrap; - xml_node _parent; - const char_t* _name; - - xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name); - }; - - // Abstract tree walker class (see xml_node::traverse) - class PUGIXML_CLASS xml_tree_walker - { - friend class xml_node; - - private: - int _depth; - - protected: - // Get current traversal depth - int depth() const; - - public: - xml_tree_walker(); - virtual ~xml_tree_walker(); - - // Callback that is called when traversal begins - virtual bool begin(xml_node& node); - - // Callback that is called for each node traversed - virtual bool for_each(xml_node& node) = 0; - - // Callback that is called when traversal ends - virtual bool end(xml_node& node); - }; - - // Parsing status, returned as part of xml_parse_result object - enum xml_parse_status - { - status_ok = 0, // No error - - status_file_not_found, // File was not found during load_file() - status_io_error, // Error reading from file/stream - status_out_of_memory, // Could not allocate memory - status_internal_error, // Internal error occurred - - status_unrecognized_tag, // Parser could not determine tag type - - status_bad_pi, // Parsing error occurred while parsing document declaration/processing instruction - status_bad_comment, // Parsing error occurred while parsing comment - status_bad_cdata, // Parsing error occurred while parsing CDATA section - status_bad_doctype, // Parsing error occurred while parsing document type declaration - status_bad_pcdata, // Parsing error occurred while parsing PCDATA section - status_bad_start_element, // Parsing error occurred while parsing start element tag - status_bad_attribute, // Parsing error occurred while parsing element attribute - status_bad_end_element, // Parsing error occurred while parsing end element tag - status_end_element_mismatch,// There was a mismatch of start-end tags (closing tag had incorrect name, some tag was not closed or there was an excessive closing tag) - - status_append_invalid_root, // Unable to append nodes since root type is not node_element or node_document (exclusive to xml_node::append_buffer) - - status_no_document_element // Parsing resulted in a document without element nodes - }; - - // Parsing result - struct PUGIXML_CLASS xml_parse_result - { - // Parsing status (see xml_parse_status) - xml_parse_status status; - - // Last parsed offset (in char_t units from start of input data) - ptrdiff_t offset; - - // Source document encoding - xml_encoding encoding; - - // Default constructor, initializes object to failed state - xml_parse_result(); - - // Cast to bool operator - operator bool() const; - - // Get error description - const char* description() const; - }; - - // Document class (DOM tree root) - class PUGIXML_CLASS xml_document: public xml_node - { - private: - char_t* _buffer; - - char _memory[192]; - - // Non-copyable semantics - xml_document(const xml_document&); - xml_document& operator=(const xml_document&); - - void create(); - void destroy(); - - public: - // Default constructor, makes empty document - xml_document(); - - // Destructor, invalidates all node/attribute handles to this document - ~xml_document(); - - // Removes all nodes, leaving the empty document - void reset(); - - // Removes all nodes, then copies the entire contents of the specified document - void reset(const xml_document& proto); - - #ifndef PUGIXML_NO_STL - // Load document from stream. - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result load(std::basic_istream >& stream, unsigned int options = parse_default); - #endif - - // (deprecated: use load_string instead) Load document from zero-terminated string. No encoding conversions are applied. - xml_parse_result load(const char_t* contents, unsigned int options = parse_default); - - // Load document from zero-terminated string. No encoding conversions are applied. - xml_parse_result load_string(const char_t* contents, unsigned int options = parse_default); - - // Load document from file - xml_parse_result load_file(const char* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - xml_parse_result load_file(const wchar_t* path, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Load document from buffer. Copies/converts the buffer, so it may be deleted or changed after the function returns. - xml_parse_result load_buffer(const void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data). - // You should ensure that buffer data will persist throughout the document's lifetime, and free the buffer memory manually once document is destroyed. - xml_parse_result load_buffer_inplace(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Load document from buffer, using the buffer for in-place parsing (the buffer is modified and used for storage of document data). - // You should allocate the buffer with pugixml allocation function; document will free the buffer when it is no longer needed (you can't use it anymore). - xml_parse_result load_buffer_inplace_own(void* contents, size_t size, unsigned int options = parse_default, xml_encoding encoding = encoding_auto); - - // Save XML document to writer (semantics is slightly different from xml_node::print, see documentation for details). - void save(xml_writer& writer, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - #ifndef PUGIXML_NO_STL - // Save XML document to stream (semantics is slightly different from xml_node::print, see documentation for details). - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - void save(std::basic_ostream >& stream, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default) const; - #endif - - // Save XML to file - bool save_file(const char* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - bool save_file(const wchar_t* path, const char_t* indent = PUGIXML_TEXT("\t"), unsigned int flags = format_default, xml_encoding encoding = encoding_auto) const; - - // Get document element - xml_node document_element() const; - }; - -#ifndef PUGIXML_NO_XPATH - // XPath query return type - enum xpath_value_type - { - xpath_type_none, // Unknown type (query failed to compile) - xpath_type_node_set, // Node set (xpath_node_set) - xpath_type_number, // Number - xpath_type_string, // String - xpath_type_boolean // Boolean - }; - - // XPath parsing result - struct PUGIXML_CLASS xpath_parse_result - { - // Error message (0 if no error) - const char* error; - - // Last parsed offset (in char_t units from string start) - ptrdiff_t offset; - - // Default constructor, initializes object to failed state - xpath_parse_result(); - - // Cast to bool operator - operator bool() const; - - // Get error description - const char* description() const; - }; - - // A single XPath variable - class PUGIXML_CLASS xpath_variable - { - friend class xpath_variable_set; - - protected: - xpath_value_type _type; - xpath_variable* _next; - - xpath_variable(xpath_value_type type); - - // Non-copyable semantics - xpath_variable(const xpath_variable&); - xpath_variable& operator=(const xpath_variable&); - - public: - // Get variable name - const char_t* name() const; - - // Get variable type - xpath_value_type type() const; - - // Get variable value; no type conversion is performed, default value (false, NaN, empty string, empty node set) is returned on type mismatch error - bool get_boolean() const; - double get_number() const; - const char_t* get_string() const; - const xpath_node_set& get_node_set() const; - - // Set variable value; no type conversion is performed, false is returned on type mismatch error - bool set(bool value); - bool set(double value); - bool set(const char_t* value); - bool set(const xpath_node_set& value); - }; - - // A set of XPath variables - class PUGIXML_CLASS xpath_variable_set - { - private: - xpath_variable* _data[64]; - - void _assign(const xpath_variable_set& rhs); - void _swap(xpath_variable_set& rhs); - - xpath_variable* _find(const char_t* name) const; - - static bool _clone(xpath_variable* var, xpath_variable** out_result); - static void _destroy(xpath_variable* var); - - public: - // Default constructor/destructor - xpath_variable_set(); - ~xpath_variable_set(); - - // Copy constructor/assignment operator - xpath_variable_set(const xpath_variable_set& rhs); - xpath_variable_set& operator=(const xpath_variable_set& rhs); - - #if __cplusplus >= 201103 - // Move semantics support - xpath_variable_set(xpath_variable_set&& rhs); - xpath_variable_set& operator=(xpath_variable_set&& rhs); - #endif - - // Add a new variable or get the existing one, if the types match - xpath_variable* add(const char_t* name, xpath_value_type type); - - // Set value of an existing variable; no type conversion is performed, false is returned if there is no such variable or if types mismatch - bool set(const char_t* name, bool value); - bool set(const char_t* name, double value); - bool set(const char_t* name, const char_t* value); - bool set(const char_t* name, const xpath_node_set& value); - - // Get existing variable by name - xpath_variable* get(const char_t* name); - const xpath_variable* get(const char_t* name) const; - }; - - // A compiled XPath query object - class PUGIXML_CLASS xpath_query - { - private: - void* _impl; - xpath_parse_result _result; - - typedef void (*unspecified_bool_type)(xpath_query***); - - // Non-copyable semantics - xpath_query(const xpath_query&); - xpath_query& operator=(const xpath_query&); - - public: - // Construct a compiled object from XPath expression. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on compilation errors. - explicit xpath_query(const char_t* query, xpath_variable_set* variables = 0); - - // Constructor - xpath_query(); - - // Destructor - ~xpath_query(); - - #if __cplusplus >= 201103 - // Move semantics support - xpath_query(xpath_query&& rhs); - xpath_query& operator=(xpath_query&& rhs); - #endif - - // Get query expression return type - xpath_value_type return_type() const; - - // Evaluate expression as boolean value in the specified context; performs type conversion if necessary. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - bool evaluate_boolean(const xpath_node& n) const; - - // Evaluate expression as double value in the specified context; performs type conversion if necessary. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - double evaluate_number(const xpath_node& n) const; - - #ifndef PUGIXML_NO_STL - // Evaluate expression as string value in the specified context; performs type conversion if necessary. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - string_t evaluate_string(const xpath_node& n) const; - #endif - - // Evaluate expression as string value in the specified context; performs type conversion if necessary. - // At most capacity characters are written to the destination buffer, full result size is returned (includes terminating zero). - // If PUGIXML_NO_EXCEPTIONS is not defined, throws std::bad_alloc on out of memory errors. - // If PUGIXML_NO_EXCEPTIONS is defined, returns empty set instead. - size_t evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const; - - // Evaluate expression as node set in the specified context. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. - // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node set instead. - xpath_node_set evaluate_node_set(const xpath_node& n) const; - - // Evaluate expression as node set in the specified context. - // Return first node in document order, or empty node if node set is empty. - // If PUGIXML_NO_EXCEPTIONS is not defined, throws xpath_exception on type mismatch and std::bad_alloc on out of memory errors. - // If PUGIXML_NO_EXCEPTIONS is defined, returns empty node instead. - xpath_node evaluate_node(const xpath_node& n) const; - - // Get parsing result (used to get compilation errors in PUGIXML_NO_EXCEPTIONS mode) - const xpath_parse_result& result() const; - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - }; - - #ifndef PUGIXML_NO_EXCEPTIONS - // XPath exception class - class PUGIXML_CLASS xpath_exception: public std::exception - { - private: - xpath_parse_result _result; - - public: - // Construct exception from parse result - explicit xpath_exception(const xpath_parse_result& result); - - // Get error message - virtual const char* what() const throw(); - - // Get parse result - const xpath_parse_result& result() const; - }; - #endif - - // XPath node class (either xml_node or xml_attribute) - class PUGIXML_CLASS xpath_node - { - private: - xml_node _node; - xml_attribute _attribute; - - typedef void (*unspecified_bool_type)(xpath_node***); - - public: - // Default constructor; constructs empty XPath node - xpath_node(); - - // Construct XPath node from XML node/attribute - xpath_node(const xml_node& node); - xpath_node(const xml_attribute& attribute, const xml_node& parent); - - // Get node/attribute, if any - xml_node node() const; - xml_attribute attribute() const; - - // Get parent of contained node/attribute - xml_node parent() const; - - // Safe bool conversion operator - operator unspecified_bool_type() const; - - // Borland C++ workaround - bool operator!() const; - - // Comparison operators - bool operator==(const xpath_node& n) const; - bool operator!=(const xpath_node& n) const; - }; - -#ifdef __BORLANDC__ - // Borland C++ workaround - bool PUGIXML_FUNCTION operator&&(const xpath_node& lhs, bool rhs); - bool PUGIXML_FUNCTION operator||(const xpath_node& lhs, bool rhs); -#endif - - // A fixed-size collection of XPath nodes - class PUGIXML_CLASS xpath_node_set - { - public: - // Collection type - enum type_t - { - type_unsorted, // Not ordered - type_sorted, // Sorted by document order (ascending) - type_sorted_reverse // Sorted by document order (descending) - }; - - // Constant iterator type - typedef const xpath_node* const_iterator; - - // We define non-constant iterator to be the same as constant iterator so that various generic algorithms (i.e. boost foreach) work - typedef const xpath_node* iterator; - - // Default constructor. Constructs empty set. - xpath_node_set(); - - // Constructs a set from iterator range; data is not checked for duplicates and is not sorted according to provided type, so be careful - xpath_node_set(const_iterator begin, const_iterator end, type_t type = type_unsorted); - - // Destructor - ~xpath_node_set(); - - // Copy constructor/assignment operator - xpath_node_set(const xpath_node_set& ns); - xpath_node_set& operator=(const xpath_node_set& ns); - - #if __cplusplus >= 201103 - // Move semantics support - xpath_node_set(xpath_node_set&& rhs); - xpath_node_set& operator=(xpath_node_set&& rhs); - #endif - - // Get collection type - type_t type() const; - - // Get collection size - size_t size() const; - - // Indexing operator - const xpath_node& operator[](size_t index) const; - - // Collection iterators - const_iterator begin() const; - const_iterator end() const; - - // Sort the collection in ascending/descending order by document order - void sort(bool reverse = false); - - // Get first node in the collection by document order - xpath_node first() const; - - // Check if collection is empty - bool empty() const; - - private: - type_t _type; - - xpath_node _storage; - - xpath_node* _begin; - xpath_node* _end; - - void _assign(const_iterator begin, const_iterator end, type_t type); - void _move(xpath_node_set& rhs); - }; -#endif - -#ifndef PUGIXML_NO_STL - // Convert wide string to UTF8 - std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const wchar_t* str); - std::basic_string, std::allocator > PUGIXML_FUNCTION as_utf8(const std::basic_string, std::allocator >& str); - - // Convert UTF8 to wide string - std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const char* str); - std::basic_string, std::allocator > PUGIXML_FUNCTION as_wide(const std::basic_string, std::allocator >& str); -#endif - - // Memory allocation function interface; returns pointer to allocated memory or NULL on failure - typedef void* (*allocation_function)(size_t size); - - // Memory deallocation function interface - typedef void (*deallocation_function)(void* ptr); - - // Override default memory management functions. All subsequent allocations/deallocations will be performed via supplied functions. - void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate); - - // Get current memory management functions - allocation_function PUGIXML_FUNCTION get_memory_allocation_function(); - deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function(); -} - -#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) -namespace std -{ - // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) - std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_attribute_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION _Iter_cat(const pugi::xml_named_node_iterator&); -} -#endif - -#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) -namespace std -{ - // Workarounds for (non-standard) iterator category detection - std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_node_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_attribute_iterator&); - std::bidirectional_iterator_tag PUGIXML_FUNCTION __iterator_category(const pugi::xml_named_node_iterator&); -} -#endif - -#endif - -// Make sure implementation is included in header-only mode -// Use macro expansion in #include to work around QMake (QTBUG-11923) -#if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE) -# define PUGIXML_SOURCE "pugixml.cpp" -# include PUGIXML_SOURCE -#endif - -/** - * Copyright (c) 2006-2016 Arseny Kapoulkine - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ diff --git a/inference-engine/scripts/cpplint.py b/inference-engine/scripts/cpplint.py new file mode 100644 index 00000000000000..da4da58076b00f --- /dev/null +++ b/inference-engine/scripts/cpplint.py @@ -0,0 +1,6127 @@ +#!/usr/bin/env python +# +# Copyright (c) 2009 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does google-lint on c++ files. + +The goal of this script is to identify places in the code that *may* +be in non-compliance with google style. It does not attempt to fix +up these problems -- the point is to educate. It does also not +attempt to find all problems, or to ensure that everything it does +find is legitimately a problem. + +In particular, we can get very confused by /* and // inside strings! +We do a small hack, which is to ignore //'s with "'s after them on the +same line, but it is far from perfect (in either direction). +""" + +import codecs +import copy +import getopt +import math # for log +import os +import re +import sre_compile +import string +import sys +import unicodedata + + +_USAGE = """ +Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...] + [--counting=total|toplevel|detailed] [--root=subdir] + [--linelength=digits] + [file] ... + + The style guidelines this tries to follow are those in + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml + + Every problem is given a confidence score from 1-5, with 5 meaning we are + certain of the problem, and 1 meaning it could be a legitimate construct. + This will miss some errors, and is not a substitute for a code review. + + To suppress false-positive errors of a certain category, add a + 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) + suppresses errors of all categories on that line. + + The files passed in will be linted; at least one file must be provided. + Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the + extensions with the --extensions flag. + + Flags: + + output=vs7 + By default, the output is formatted to ease emacs parsing. Visual Studio + compatible output (vs7) may also be used. Other formats are unsupported. + + verbose=# + Specify a number 0-5 to restrict errors to certain verbosity levels. + + filter=-x,+y,... + Specify a comma-separated list of category-filters to apply: only + error messages whose category names pass the filters will be printed. + (Category names are printed with the message and look like + "[whitespace/indent]".) Filters are evaluated left to right. + "-FOO" and "FOO" means "do not print categories that start with FOO". + "+FOO" means "do print categories that start with FOO". + + Examples: --filter=-whitespace,+whitespace/braces + --filter=whitespace,runtime/printf,+runtime/printf_format + --filter=-,+build/include_what_you_use + + To see a list of all the categories used in cpplint, pass no arg: + --filter= + + counting=total|toplevel|detailed + The total number of errors found is always printed. If + 'toplevel' is provided, then the count of errors in each of + the top-level categories like 'build' and 'whitespace' will + also be printed. If 'detailed' is provided, then a count + is provided for each category like 'build/class'. + + root=subdir + The root directory used for deriving header guard CPP variable. + By default, the header guard CPP variable is calculated as the relative + path to the directory that contains .git, .hg, or .svn. When this flag + is specified, the relative path is calculated from the specified + directory. If the specified directory does not exist, this flag is + ignored. + + Examples: + Assuming that src/.git exists, the header guard CPP variables for + src/chrome/browser/ui/browser.h are: + + No flag => CHROME_BROWSER_UI_BROWSER_H_ + --root=chrome => BROWSER_UI_BROWSER_H_ + --root=chrome/browser => UI_BROWSER_H_ + + linelength=digits + This is the allowed line length for the project. The default value is + 80 characters. + + Examples: + --linelength=120 + + extensions=extension,extension,... + The allowed file extensions that cpplint will check + + Examples: + --extensions=hpp,cpp + + cpplint.py supports per-directory configurations specified in CPPLINT.cfg + files. CPPLINT.cfg file can contain a number of key=value pairs. + Currently the following options are supported: + + set noparent + filter=+filter1,-filter2,... + exclude_files=regex + linelength=80 + root=subdir + + "set noparent" option prevents cpplint from traversing directory tree + upwards looking for more .cfg files in parent directories. This option + is usually placed in the top-level project directory. + + The "filter" option is similar in function to --filter flag. It specifies + message filters in addition to the |_DEFAULT_FILTERS| and those specified + through --filter command-line flag. + + "exclude_files" allows to specify a regular expression to be matched against + a file name. If the expression matches, the file is skipped and not run + through liner. + + "linelength" allows to specify the allowed line length for the project. + + The "root" option is similar in function to the --root flag (see example + above). + + CPPLINT.cfg has an effect on files in the same directory and all + sub-directories, unless overridden by a nested configuration file. + + Example file: + filter=-build/include_order,+build/include_alpha + exclude_files=.*\.cc + + The above example disables build/include_order warning and enables + build/include_alpha as well as excludes all .cc from being + processed by linter, in the current directory (where the .cfg + file is located) and all sub-directories. +""" + +# We categorize each error message we print. Here are the categories. +# We want an explicit list so we can list them all in cpplint --filter=. +# If you add a new error message with a new category, add it to the list +# here! cpplint_unittest.py should tell you if you forget to do this. +_ERROR_CATEGORIES = [ + 'build/class', + 'build/c++11', + 'build/c++14', + 'build/c++tr1', + 'build/deprecated', + 'build/endif_comment', + 'build/explicit_make_pair', + 'build/forward_decl', + 'build/header_guard', + 'build/include', + 'build/include_alpha', + 'build/include_order', + 'build/include_what_you_use', + 'build/namespaces', + 'build/printf_format', + 'build/storage_class', + 'legal/copyright', + 'readability/alt_tokens', + 'readability/braces', + 'readability/casting', + 'readability/check', + 'readability/constructors', + 'readability/fn_size', + 'readability/inheritance', + 'readability/multiline_comment', + 'readability/multiline_string', + 'readability/namespace', + 'readability/nolint', + 'readability/nul', + 'readability/strings', + 'readability/todo', + 'readability/utf8', + 'runtime/arrays', + 'runtime/casting', + 'runtime/explicit', + 'runtime/int', + 'runtime/init', + 'runtime/invalid_increment', + 'runtime/member_string_references', + 'runtime/memset', + 'runtime/indentation_namespace', + 'runtime/operator', + 'runtime/printf', + 'runtime/printf_format', + 'runtime/references', + 'runtime/string', + 'runtime/threadsafe_fn', + 'runtime/vlog', + 'whitespace/blank_line', + 'whitespace/braces', + 'whitespace/comma', + 'whitespace/comments', + 'whitespace/empty_conditional_body', + 'whitespace/empty_if_body', + 'whitespace/empty_loop_body', + 'whitespace/end_of_line', + 'whitespace/ending_newline', + 'whitespace/forcolon', + 'whitespace/indent', + 'whitespace/line_length', + 'whitespace/newline', + 'whitespace/operators', + 'whitespace/parens', + 'whitespace/semicolon', + 'whitespace/tab', + 'whitespace/todo', + ] + +# These error categories are no longer enforced by cpplint, but for backwards- +# compatibility they may still appear in NOLINT comments. +_LEGACY_ERROR_CATEGORIES = [ + 'readability/streams', + 'readability/function', + ] + +# The default state of the category filter. This is overridden by the --filter= +# flag. By default all errors are on, so only add here categories that should be +# off by default (i.e., categories that must be enabled by the --filter= flags). +# All entries here should start with a '-' or '+', as in the --filter= flag. +_DEFAULT_FILTERS = ['-build/include_alpha'] + +# The default list of categories suppressed for C (not C++) files. +_DEFAULT_C_SUPPRESSED_CATEGORIES = [ + 'readability/casting', + ] + +# The default list of categories suppressed for Linux Kernel files. +_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ + 'whitespace/tab', + ] + +# We used to check for high-bit characters, but after much discussion we +# decided those were OK, as long as they were in UTF-8 and didn't represent +# hard-coded international strings, which belong in a separate i18n file. + +# C++ headers +_CPP_HEADERS = frozenset([ + # Legacy + 'algobase.h', + 'algo.h', + 'alloc.h', + 'builtinbuf.h', + 'bvector.h', + 'complex.h', + 'defalloc.h', + 'deque.h', + 'editbuf.h', + 'fstream.h', + 'function.h', + 'hash_map', + 'hash_map.h', + 'hash_set', + 'hash_set.h', + 'hashtable.h', + 'heap.h', + 'indstream.h', + 'iomanip.h', + 'iostream.h', + 'istream.h', + 'iterator.h', + 'list.h', + 'map.h', + 'multimap.h', + 'multiset.h', + 'ostream.h', + 'pair.h', + 'parsestream.h', + 'pfstream.h', + 'procbuf.h', + 'pthread_alloc', + 'pthread_alloc.h', + 'rope', + 'rope.h', + 'ropeimpl.h', + 'set.h', + 'slist', + 'slist.h', + 'stack.h', + 'stdiostream.h', + 'stl_alloc.h', + 'stl_relops.h', + 'streambuf.h', + 'stream.h', + 'strfile.h', + 'strstream.h', + 'tempbuf.h', + 'tree.h', + 'type_traits.h', + 'vector.h', + # 17.6.1.2 C++ library headers + 'algorithm', + 'array', + 'atomic', + 'bitset', + 'chrono', + 'codecvt', + 'complex', + 'condition_variable', + 'deque', + 'exception', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'limits', + 'list', + 'locale', + 'map', + 'memory', + 'mutex', + 'new', + 'numeric', + 'ostream', + 'queue', + 'random', + 'ratio', + 'regex', + 'scoped_allocator', + 'set', + 'sstream', + 'stack', + 'stdexcept', + 'streambuf', + 'string', + 'strstream', + 'system_error', + 'thread', + 'tuple', + 'typeindex', + 'typeinfo', + 'type_traits', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'vector', + # 17.6.1.2 C++ headers for C library facilities + 'cassert', + 'ccomplex', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'cinttypes', + 'ciso646', + 'climits', + 'clocale', + 'cmath', + 'csetjmp', + 'csignal', + 'cstdalign', + 'cstdarg', + 'cstdbool', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctgmath', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + ]) + +# Type names +_TYPES = re.compile( + r'^(?:' + # [dcl.type.simple] + r'(char(16_t|32_t)?)|wchar_t|' + r'bool|short|int|long|signed|unsigned|float|double|' + # [support.types] + r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' + # [cstdint.syn] + r'(u?int(_fast|_least)?(8|16|32|64)_t)|' + r'(u?int(max|ptr)_t)|' + r')$') + + +# These headers are excluded from [build/include] and [build/include_order] +# checks: +# - Anything not following google file name conventions (containing an +# uppercase character, such as Python.h or nsStringAPI.h, for example). +# - Lua headers. +_THIRD_PARTY_HEADERS_PATTERN = re.compile( + r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') + +# Pattern for matching FileInfo.BaseName() against test file name +_TEST_FILE_SUFFIX = r'(_test|_unittest|_regtest)$' + +# Pattern that matches only complete whitespace, possibly across multiple lines. +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) + +# Assertion macros. These are defined in base/logging.h and +# testing/base/public/gunit.h. +_CHECK_MACROS = [ + 'DCHECK', 'CHECK', + 'EXPECT_TRUE', 'ASSERT_TRUE', + 'EXPECT_FALSE', 'ASSERT_FALSE', + ] + +# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE +_CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) + +for op, replacement in [('==', 'EQ'), ('!=', 'NE'), + ('>=', 'GE'), ('>', 'GT'), + ('<=', 'LE'), ('<', 'LT')]: + _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement + _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement + _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement + _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement + +for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), + ('>=', 'LT'), ('>', 'LE'), + ('<=', 'GT'), ('<', 'GE')]: + _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement + _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement + +# Alternative tokens and their replacements. For full list, see section 2.5 +# Alternative tokens [lex.digraph] in the C++ standard. +# +# Digraphs (such as '%:') are not included here since it's a mess to +# match those on a word boundary. +_ALT_TOKEN_REPLACEMENT = { + 'and': '&&', + 'bitor': '|', + 'or': '||', + 'xor': '^', + 'compl': '~', + 'bitand': '&', + 'and_eq': '&=', + 'or_eq': '|=', + 'xor_eq': '^=', + 'not': '!', + 'not_eq': '!=' + } + +# Compile regular expression that matches all the above keywords. The "[ =()]" +# bit is meant to avoid matching these keywords outside of boolean expressions. +# +# False positives include C-style multi-line comments and multi-line strings +# but those have always been troublesome for cpplint. +_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( + r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') + + +# These constants define types of headers for use with +# _IncludeState.CheckNextIncludeOrder(). +_C_SYS_HEADER = 1 +_CPP_SYS_HEADER = 2 +_LIKELY_MY_HEADER = 3 +_POSSIBLE_MY_HEADER = 4 +_OTHER_HEADER = 5 + +# These constants define the current inline assembly state +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block + +# Match start of assembly blocks +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' + r'(?:\s+(volatile|__volatile__))?' + r'\s*[{(]') + +# Match strings that indicate we're working on a C (not C++) file. +_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' + r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') + +# Match string that indicates we're working on a Linux Kernel file. +_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') + +_regexp_compile_cache = {} + +# {str, set(int)}: a map from error categories to sets of linenumbers +# on which those errors are expected and should be suppressed. +_error_suppressions = {} + +# The root directory used for deriving header guard CPP variable. +# This is set by --root flag. +_root = None + +# The allowed line length of files. +# This is set by --linelength flag. +_line_length = 80 + +# The allowed extensions for file names +# This is set by --extensions flag. +_valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh', 'hpp', 'c']) + +# {str, bool}: a map from error categories to booleans which indicate if the +# category should be suppressed for every line. +_global_error_suppressions = {} + + +def ParseNolintSuppressions(filename, raw_line, linenum, error): + """Updates the global list of line error-suppressions. + + Parses any NOLINT comments on the current line, updating the global + error_suppressions store. Reports an error if the NOLINT comment + was malformed. + + Args: + filename: str, the name of the input file. + raw_line: str, the line of input text, with comments. + linenum: int, the number of the current line. + error: function, an error handler. + """ + matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line) + if matched: + if matched.group(1): + suppressed_line = linenum + 1 + else: + suppressed_line = linenum + category = matched.group(2) + if category in (None, '(*)'): # => "suppress all" + _error_suppressions.setdefault(None, set()).add(suppressed_line) + else: + if category.startswith('(') and category.endswith(')'): + category = category[1:-1] + if category in _ERROR_CATEGORIES: + _error_suppressions.setdefault(category, set()).add(suppressed_line) + elif category not in _LEGACY_ERROR_CATEGORIES: + error(filename, linenum, 'readability/nolint', 5, + 'Unknown NOLINT error category: %s' % category) + + +def ProcessGlobalSuppresions(lines): + """Updates the list of global error suppressions. + + Parses any lint directives in the file that have global effect. + + Args: + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + """ + for line in lines: + if _SEARCH_C_FILE.search(line): + for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + + +def ResetNolintSuppressions(): + """Resets the set of NOLINT suppressions to empty.""" + _error_suppressions.clear() + _global_error_suppressions.clear() + + +def IsErrorSuppressedByNolint(category, linenum): + """Returns true if the specified error category is suppressed on this line. + + Consults the global error_suppressions map populated by + ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + + Args: + category: str, the category of the error. + linenum: int, the current line number. + Returns: + bool, True iff the error should be suppressed due to a NOLINT comment or + global suppression. + """ + return (_global_error_suppressions.get(category, False) or + linenum in _error_suppressions.get(category, set()) or + linenum in _error_suppressions.get(None, set())) + + +def Match(pattern, s): + """Matches the string with the pattern, caching the compiled regexp.""" + # The regexp compilation caching is inlined in both Match and Search for + # performance reasons; factoring it out into a separate function turns out + # to be noticeably expensive. + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].match(s) + + +def ReplaceAll(pattern, rep, s): + """Replaces instances of pattern in a string with a replacement. + + The compiled regex is kept in a cache shared by Match and Search. + + Args: + pattern: regex pattern + rep: replacement text + s: search string + + Returns: + string with replacements made (or original string if no replacements) + """ + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].sub(rep, s) + + +def Search(pattern, s): + """Searches the string for the pattern, caching the compiled regexp.""" + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].search(s) + + +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in ('c', 'cc', 'cpp', 'cxx') + + +class _IncludeState(object): + """Tracks line numbers for includes, and the order in which includes appear. + + include_list contains list of lists of (header, line number) pairs. + It's a lists of lists rather than just one flat list to make it + easier to update across preprocessor boundaries. + + Call CheckNextIncludeOrder() once for each header in the file, passing + in the type constants defined above. Calls in an illegal order will + raise an _IncludeError with an appropriate error message. + + """ + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_H_SECTION = 4 + + _TYPE_NAMES = { + _C_SYS_HEADER: 'C system header', + _CPP_SYS_HEADER: 'C++ system header', + _LIKELY_MY_HEADER: 'header this file implements', + _POSSIBLE_MY_HEADER: 'header this file may implement', + _OTHER_HEADER: 'other header', + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: 'a header this file implements', + _C_SECTION: 'C system header', + _CPP_SECTION: 'C++ system header', + _OTHER_H_SECTION: 'other header', + } + + def __init__(self): + self.include_list = [[]] + self.ResetSection('') + + def FindHeader(self, header): + """Check if a header has already been included. + + Args: + header: header to check. + Returns: + Line number of previous occurrence, or -1 if the header has not + been seen before. + """ + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + + def ResetSection(self, directive): + """Reset section checking for preprocessor directive. + + Args: + directive: preprocessor directive (e.g. "if", "else"). + """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = '' + + # Update list of includes. Note that we never pop from the + # include list. + if directive in ('if', 'ifdef', 'ifndef'): + self.include_list.append([]) + elif directive in ('else', 'elif'): + self.include_list[-1] = [] + + def SetLastHeader(self, header_path): + self._last_header = header_path + + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. + + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. + + Args: + header_path: Path to be canonicalized. + + Returns: + Canonicalized path. + """ + return header_path.replace('-inl.h', '.h').replace('-', '_').lower() + + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. + + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if (self._last_header > header_path and + Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): + return False + return True + + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. + + This function also updates the internal state to be ready to check + the next include. + + Args: + header_type: One of the _XXX_HEADER constants defined above. + + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. + + """ + error_message = ('Found %s after %s' % + (self._TYPE_NAMES[header_type], + self._SECTION_NAMES[self._section])) + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = '' + #return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = '' + #return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION + + if last_section != self._section: + self._last_header = '' + + return '' + + +class _CppLintState(object): + """Maintains module-wide state..""" + + def __init__(self): + self.verbose_level = 1 # global setting. + self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = 'total' # In what way are we counting errors? + self.errors_by_category = {} # string to int dict storing error counts + + # output format: + # "emacs" - format that emacs can parse (default) + # "vs7" - format that Microsoft Visual Studio 7 can parse + self.output_format = 'emacs' + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level + + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + + def SetFilters(self, filters): + """Sets the error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. + + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """ Adds more filters to the existing list of error-message filters. """ + for filt in filters.split(','): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith('+') or filt.startswith('-')): + raise ValueError('Every filter in --filters must start with + or -' + ' (%s does not)' % filt) + + def BackupFilters(self): + """ Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """ Restores filters previously backed up.""" + self.filters = self._filters_backup[:] + + def ResetErrorCounts(self): + """Sets the module's error statistic back to zero.""" + self.error_count = 0 + self.errors_by_category = {} + + def IncrementErrorCount(self, category): + """Bumps the module's error statistic.""" + self.error_count += 1 + if self.counting in ('toplevel', 'detailed'): + if self.counting != 'detailed': + category = category.split('/')[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in self.errors_by_category.iteritems(): + sys.stderr.write('Category \'%s\' errors found: %d\n' % + (category, count)) + sys.stderr.write('Total errors found: %d\n' % self.error_count) + +_cpplint_state = _CppLintState() + + +def _OutputFormat(): + """Gets the module's output format.""" + return _cpplint_state.output_format + + +def _SetOutputFormat(output_format): + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) + + +def _VerboseLevel(): + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level + + +def _SetVerboseLevel(level): + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) + + +def _SetCountingStyle(level): + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) + + +def _Filters(): + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters + + +def _SetFilters(filters): + """Sets the module's error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.SetFilters(filters) + +def _AddFilters(filters): + """Adds more filter overrides. + + Unlike _SetFilters, this function does not reset the current list of filters + available. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.AddFilters(filters) + +def _BackupFilters(): + """ Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() + +def _RestoreFilters(): + """ Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() + +class _FunctionState(object): + """Tracks current function name and the number of lines in its body.""" + + _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. + _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. + + def __init__(self): + self.in_a_function = False + self.lines_in_function = 0 + self.current_function = '' + + def Begin(self, function_name): + """Start analyzing function body. + + Args: + function_name: The name of the function being tracked. + """ + self.in_a_function = True + self.lines_in_function = 0 + self.current_function = function_name + + def Count(self): + """Count line in current function body.""" + if self.in_a_function: + self.lines_in_function += 1 + + def Check(self, error, filename, linenum): + """Report if too many lines in function body. + + Args: + error: The function to call with any errors found. + filename: The name of the current file. + linenum: The number of the line to check. + """ + if not self.in_a_function: + return + + if Match(r'T(EST|est)', self.current_function): + base_trigger = self._TEST_TRIGGER + else: + base_trigger = self._NORMAL_TRIGGER + trigger = base_trigger * 2**_VerboseLevel() + + if self.lines_in_function > trigger: + error_level = int(math.log(self.lines_in_function / base_trigger, 2)) + # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... + if error_level > 5: + error_level = 5 + error(filename, linenum, 'readability/fn_size', error_level, + 'Small and focused functions are preferred:' + ' %s has %d non-comment lines' + ' (error triggered by exceeding %d lines).' % ( + self.current_function, self.lines_in_function, trigger)) + + def End(self): + """Stop analyzing function body.""" + self.in_a_function = False + + +class _IncludeError(Exception): + """Indicates a problem with the include order in a file.""" + pass + + +class FileInfo(object): + """Provides utility functions for filenames. + + FileInfo provides easy access to the components of a file's path + relative to the project root. + """ + + def __init__(self, filename): + self._filename = filename + + def FullName(self): + """Make Windows paths like Unix.""" + return os.path.abspath(self._filename).replace('\\', '/') + + def RepositoryName(self): + """FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\Documents and Settings\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if (os.path.exists(os.path.join(current_dir, ".git")) or + os.path.exists(os.path.join(current_dir, ".hg")) or + os.path.exists(os.path.join(current_dir, ".svn"))): + root_dir = current_dir + current_dir = os.path.dirname(current_dir) + + if (os.path.exists(os.path.join(root_dir, ".git")) or + os.path.exists(os.path.join(root_dir, ".hg")) or + os.path.exists(os.path.join(root_dir, ".svn"))): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') + + Returns: + A tuple of (directory, basename, extension). + """ + + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) + + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] + + def Extension(self): + """File extension - text following the final period.""" + return self.Split()[2] + + def NoExtension(self): + """File has no source file extension.""" + return '/'.join(self.Split()[0:2]) + + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) + + +def _ShouldPrintError(category, confidence, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" + + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + + if confidence < _cpplint_state.verbose_level: + return False + + is_filtered = False + for one_filter in _Filters(): + if one_filter.startswith('-'): + if category.startswith(one_filter[1:]): + is_filtered = True + elif one_filter.startswith('+'): + if category.startswith(one_filter[1:]): + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False + + return True + + +def Error(filename, linenum, category, confidence, message): + """Logs the fact we've found a lint error. + + We log where the error was found, and also our confidence in the error, + that is, how certain we are this is a legitimate style regression, and + not a misidentification or a use that's sometimes justified. + + False positives can be suppressed by the use of + "cpplint(category)" comments on the offending line. These are + parsed into _error_suppressions. + + Args: + filename: The name of the file containing the error. + linenum: The number of the line containing the error. + category: A string used to describe the "category" this bug + falls under: "whitespace", say, or "runtime". Categories + may have a hierarchy separated by slashes: "whitespace/indent". + confidence: A number from 1-5 representing a confidence score for + the error, with 5 meaning that we are certain of the problem, + and 1 meaning that it could be a legitimate construct. + message: The error message. + """ + if _ShouldPrintError(category, confidence, linenum): + _cpplint_state.IncrementErrorCount(category) + if _cpplint_state.output_format == 'vs7': + sys.stderr.write('%s(%s): %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + elif _cpplint_state.output_format == 'eclipse': + sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + else: + sys.stderr.write('%s:%s: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + + +# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( + r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') +# Match a single C style comment on the same line. +_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' +# Matches multi-line C style comments. +# This RE is a little bit more complicated than one might expect, because we +# have to take care of space removals tools so we can handle comments inside +# statements better. +# The current rule is: We only clear spaces from both sides when we're at the +# end of the line. Otherwise, we try to remove spaces from the right side, +# if this doesn't work we try on left side but only if there's a non-character +# on the right. +_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( + r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + + _RE_PATTERN_C_COMMENTS + r'\s+|' + + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + + _RE_PATTERN_C_COMMENTS + r')') + + +def IsCppString(line): + """Does line terminate so, that the next symbol is in string constant. + + This function does not consider single-line nor multi-line comments. + + Args: + line: is a partial line of code starting from the 0..n. + + Returns: + True, if next character appended to 'line' is inside a + string constant. + """ + + line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" + return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 + + +def CleanseRawStrings(raw_lines): + """Removes C++11 raw strings from lines. + + Before: + static const char kData[] = R"( + multi-line string + )"; + + After: + static const char kData[] = "" + (replaced by blank line) + ""; + + Args: + raw_lines: list of raw lines. + + Returns: + list of lines with C++11 raw strings replaced by empty strings. + """ + + delimiter = None + lines_without_raw_strings = [] + for line in raw_lines: + if delimiter: + # Inside a raw string, look for the end + end = line.find(delimiter) + if end >= 0: + # Found the end of the string, match leading space for this + # line and resume copying the original lines, and also insert + # a "" on the last line. + leading_space = Match(r'^(\s*)\S', line) + line = leading_space.group(1) + '""' + line[end + len(delimiter):] + delimiter = None + else: + # Haven't found the end yet, append a blank line. + line = '""' + + # Look for beginning of a raw string, and replace them with + # empty strings. This is done in a loop to handle multiple raw + # strings on the same line. + while delimiter is None: + # Look for beginning of a raw string. + # See 2.14.15 [lex.string] for syntax. + # + # Once we have matched a raw string, we check the prefix of the + # line to make sure that the line is not part of a single line + # comment. It's done this way because we remove raw strings + # before removing comments as opposed to removing comments + # before removing raw strings. This is because there are some + # cpplint checks that requires the comments to be preserved, but + # we don't want to check comments that are inside raw strings. + matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + if (matched and + not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', + matched.group(1))): + delimiter = ')' + matched.group(2) + '"' + + end = matched.group(3).find(delimiter) + if end >= 0: + # Raw string ended on same line + line = (matched.group(1) + '""' + + matched.group(3)[end + len(delimiter):]) + delimiter = None + else: + # Start of a multi-line raw string + line = matched.group(1) + '""' + else: + break + + lines_without_raw_strings.append(line) + + # TODO(unknown): if delimiter is not None here, we might want to + # emit a warning for unterminated string. + return lines_without_raw_strings + + +def FindNextMultiLineCommentStart(lines, lineix): + """Find the beginning marker for a multiline comment.""" + while lineix < len(lines): + if lines[lineix].strip().startswith('/*'): + # Only return this marker if the comment goes beyond this line + if lines[lineix].strip().find('*/', 2) < 0: + return lineix + lineix += 1 + return len(lines) + + +def FindNextMultiLineCommentEnd(lines, lineix): + """We are inside a comment, find the end marker.""" + while lineix < len(lines): + if lines[lineix].strip().endswith('*/'): + return lineix + lineix += 1 + return len(lines) + + +def RemoveMultiLineCommentsFromRange(lines, begin, end): + """Clears a range of lines for multi-line comments.""" + # Having // dummy comments makes the lines non-empty, so we will not get + # unnecessary blank line warnings later in the code. + for i in range(begin, end): + lines[i] = '/**/' + + +def RemoveMultiLineComments(filename, lines, error): + """Removes multiline (c-style) comments from lines.""" + lineix = 0 + while lineix < len(lines): + lineix_begin = FindNextMultiLineCommentStart(lines, lineix) + if lineix_begin >= len(lines): + return + lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) + if lineix_end >= len(lines): + error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, + 'Could not find end of multi-line comment') + return + RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) + lineix = lineix_end + 1 + + +def CleanseComments(line): + """Removes //-comments and single-line C-style /* */ comments. + + Args: + line: A line of C++ source. + + Returns: + The line with single-line comments removed. + """ + commentpos = line.find('//') + if commentpos != -1 and not IsCppString(line[:commentpos]): + line = line[:commentpos].rstrip() + # get rid of /* ... */ + return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) + + +class CleansedLines(object): + """Holds 4 copies of all lines with different preprocessing applied to them. + + 1) elided member contains lines without strings and comments. + 2) lines member contains lines without comments. + 3) raw_lines member contains all the lines without processing. + 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw + strings removed. + All these members are of , and of the same length. + """ + + def __init__(self, lines): + self.elided = [] + self.lines = [] + self.raw_lines = lines + self.num_lines = len(lines) + self.lines_without_raw_strings = CleanseRawStrings(lines) + for linenum in range(len(self.lines_without_raw_strings)): + self.lines.append(CleanseComments( + self.lines_without_raw_strings[linenum])) + elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) + self.elided.append(CleanseComments(elided)) + + def NumLines(self): + """Returns the number of lines represented.""" + return self.num_lines + + @staticmethod + def _CollapseStrings(elided): + """Collapses strings and chars on a line to simple "" or '' blocks. + + We nix strings first so we're not fooled by text like '"http://"' + + Args: + elided: The line being processed. + + Returns: + The line with collapsed strings. + """ + if _RE_PATTERN_INCLUDE.match(elided): + return elided + + # Remove escaped characters first to make quote/single quote collapsing + # basic. Things that look like escaped characters shouldn't occur + # outside of strings and chars. + elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) + + # Replace quoted strings and digit separators. Both single quotes + # and double quotes are processed in the same loop, otherwise + # nested quotes wouldn't work. + collapsed = '' + while True: + # Find the first quote character + match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) + if not match: + collapsed += elided + break + head, quote, tail = match.groups() + + if quote == '"': + # Collapse double quoted strings + second_quote = tail.find('"') + if second_quote >= 0: + collapsed += head + '""' + elided = tail[second_quote + 1:] + else: + # Unmatched double quote, don't bother processing the rest + # of the line since this is probably a multiline string. + collapsed += elided + break + else: + # Found single quote, check nearby text to eliminate digit separators. + # + # There is no special handling for floating point here, because + # the integer/fractional/exponent parts would all be parsed + # correctly as long as there are digits on both sides of the + # separator. So we are fine as long as we don't see something + # like "0.'3" (gcc 4.9.0 will not allow this literal). + if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): + match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) + collapsed += head + match_literal.group(1).replace("'", '') + elided = match_literal.group(2) + else: + second_quote = tail.find('\'') + if second_quote >= 0: + collapsed += head + "''" + elided = tail[second_quote + 1:] + else: + # Unmatched single quote + collapsed += elided + break + + return collapsed + + +def FindEndOfExpressionInLine(line, startpos, stack): + """Find the position just after the end of current parenthesized expression. + + Args: + line: a CleansedLines line. + startpos: start searching at this position. + stack: nesting stack at startpos. + + Returns: + On finding matching end: (index just after matching end, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at end of this line) + """ + for i in xrange(startpos, len(line)): + char = line[i] + if char in '([{': + # Found start of parenthesized expression, push to expression stack + stack.append(char) + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + if stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + elif i > 0 and Search(r'\boperator\s*$', line[0:i]): + # operator<, don't add to stack + continue + else: + # Tentative start of template argument list + stack.append('<') + elif char in ')]}': + # Found end of parenthesized expression. + # + # If we are currently expecting a matching '>', the pending '<' + # must have been an operator. Remove them from expression stack. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + if ((stack[-1] == '(' and char == ')') or + (stack[-1] == '[' and char == ']') or + (stack[-1] == '{' and char == '}')): + stack.pop() + if not stack: + return (i + 1, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == '>': + # Found potential end of template argument list. + + # Ignore "->" and operator functions + if (i > 0 and + (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))): + continue + + # Pop the stack if there is a matching '<'. Otherwise, ignore + # this '>' since it must be an operator. + if stack: + if stack[-1] == '<': + stack.pop() + if not stack: + return (i + 1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '>', the matching '<' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + + # Did not find end of expression or unbalanced parentheses on this line + return (-1, stack) + + +def CloseExpression(clean_lines, linenum, pos): + """If input points to ( or { or [ or <, finds the position that closes it. + + If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the + linenum/pos that correspond to the closing of the expression. + + TODO(unknown): cpplint spends a fair bit of time matching parentheses. + Ideally we would want to index all opening and closing parentheses once + and have CloseExpression be just a simple lookup, but due to preprocessor + tricks, this is not so easy. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *past* the closing brace, or + (line, len(lines), -1) if we never find a close. Note we ignore + strings and comments when matching; and the line we return is the + 'cleansed' line at linenum. + """ + + line = clean_lines.elided[linenum] + if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): + return (line, clean_lines.NumLines(), -1) + + # Check first line + (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) + if end_pos > -1: + return (line, linenum, end_pos) + + # Continue scanning forward + while stack and linenum < clean_lines.NumLines() - 1: + linenum += 1 + line = clean_lines.elided[linenum] + (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) + if end_pos > -1: + return (line, linenum, end_pos) + + # Did not find end of expression before end of file, give up + return (line, clean_lines.NumLines(), -1) + + +def FindStartOfExpressionInLine(line, endpos, stack): + """Find position at the matching start of current expression. + + This is almost the reverse of FindEndOfExpressionInLine, but note + that the input position and returned position differs by 1. + + Args: + line: a CleansedLines line. + endpos: start searching at this position. + stack: nesting stack at endpos. + + Returns: + On finding matching start: (index at matching start, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at beginning of this line) + """ + i = endpos + while i >= 0: + char = line[i] + if char in ')]}': + # Found end of expression, push to expression stack + stack.append(char) + elif char == '>': + # Found potential end of template argument list. + # + # Ignore it if it's a "->" or ">=" or "operator>" + if (i > 0 and + (line[i - 1] == '-' or + Match(r'\s>=\s', line[i - 1:]) or + Search(r'\boperator\s*$', line[0:i]))): + i -= 1 + else: + stack.append('>') + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + i -= 1 + else: + # If there is a matching '>', we can pop the expression stack. + # Otherwise, ignore this '<' since it must be an operator. + if stack and stack[-1] == '>': + stack.pop() + if not stack: + return (i, None) + elif char in '([{': + # Found start of expression. + # + # If there are any unmatched '>' on the stack, they must be + # operators. Remove those. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + if ((char == '(' and stack[-1] == ')') or + (char == '[' and stack[-1] == ']') or + (char == '{' and stack[-1] == '}')): + stack.pop() + if not stack: + return (i, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '<', the matching '>' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + + i -= 1 + + return (-1, stack) + + +def ReverseCloseExpression(clean_lines, linenum, pos): + """If input points to ) or } or ] or >, finds the position that opens it. + + If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the + linenum/pos that correspond to the opening of the expression. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *at* the opening brace, or + (line, 0, -1) if we never find the matching opening brace. Note + we ignore strings and comments when matching; and the line we + return is the 'cleansed' line at linenum. + """ + line = clean_lines.elided[linenum] + if line[pos] not in ')}]>': + return (line, 0, -1) + + # Check last line + (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) + if start_pos > -1: + return (line, linenum, start_pos) + + # Continue scanning backward + while stack and linenum > 0: + linenum -= 1 + line = clean_lines.elided[linenum] + (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) + if start_pos > -1: + return (line, linenum, start_pos) + + # Did not find start of expression before beginning of file, give up + return (line, 0, -1) + + +def CheckForCopyright(filename, lines, error): + """Logs an error if no Copyright message appears at the top of the file.""" + + # We'll say it should occur by line 10. Don't forget there's a + # dummy line at the front. + for line in xrange(1, min(len(lines), 11)): + if re.search(r'Copyright', lines[line], re.I): break + else: # means no copyright line was found + error(filename, 0, 'legal/copyright', 5, + 'No copyright message found. ' + 'You should have a line: "Copyright [year] "') + + +def GetIndentLevel(line): + """Return the number of leading spaces in line. + + Args: + line: A string to check. + + Returns: + An integer count of leading spaces, possibly zero. + """ + indent = Match(r'^( *)\S', line) + if indent: + return len(indent.group(1)) + else: + return 0 + + +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. + + Args: + filename: The name of a C++ header file. + + Returns: + The CPP variable that should be used as a header guard in the + named file. + + """ + + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') + + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() + if _root: + suffix = os.sep + # On Windows using directory separator will leave us with + # "bogus escape error" unless we properly escape regex. + if suffix == '\\': + suffix += '\\' + file_path_from_root = re.sub('^' + _root + suffix, '', file_path_from_root) + return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' + + +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. + + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. + + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ + + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if Search(r'//\s*NOLINT\(build/header_guard\)', i): + return + + cppvar = GetHeaderGuardCPPVariable(filename) + + ifndef = '' + ifndef_linenum = 0 + define = '' + endif = '' + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == '#ifndef': + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == '#define': + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith('#endif'): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + # error(filename, 0, 'build/header_guard', 5, + # 'No #ifndef header guard found, suggested CPP variable is: %s' % + # cppvar) + return + + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + '_': + error_level = 5 + + ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, + error) + error(filename, ifndef_linenum, 'build/header_guard', error_level, + '#ifndef header guard has wrong style, please use: %s' % cppvar) + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, + error) + match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) + if match: + if match.group(1) == '_': + # Issue low severity warning for deprecated double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif // %s"' % cppvar) + return + + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in xrange(1, len(raw_lines) - 1): + line = raw_lines[i] + if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break + + if no_single_line_comments: + match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) + if match: + if match.group(1) == '_': + # Low severity warning for double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif /* %s */"' % cppvar) + return + + # Didn't find anything + error(filename, endif_linenum, 'build/header_guard', 5, + '#endif line should be "#endif // %s"' % cppvar) + + +def CheckHeaderFileIncluded(filename, include_state, error): + """Logs an error if a .cc file does not include its header.""" + + # Do not check test files + fileinfo = FileInfo(filename) + if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return + + headerfile = filename[0:len(filename) - len(fileinfo.Extension())] + '.h' + if not os.path.exists(headerfile): + return + headername = FileInfo(headerfile).RepositoryName() + first_include = 0 + for section_list in include_state.include_list: + for f in section_list: + if headername in f[0] or f[0] in headername: + return + if not first_include: + first_include = f[1] + + if "MnistUbyte.h" in headername or "InferenceEngineConfigurator.h" in headername: + return + error(filename, first_include, 'build/include', 5, + '%s should include its header file %s' % (fileinfo.RepositoryName(), + headername)) + + +def CheckForBadCharacters(filename, lines, error): + """Logs an error for each line containing bad characters. + + Two kinds of bad characters: + + 1. Unicode replacement characters: These indicate that either the file + contained invalid UTF-8 (likely) or Unicode replacement characters (which + it shouldn't). Note that it's possible for this to throw off line + numbering if the invalid UTF-8 occurred adjacent to a newline. + + 2. NUL bytes. These are problematic for some tools. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + for linenum, line in enumerate(lines): + if u'\ufffd' in line: + error(filename, linenum, 'readability/utf8', 5, + 'Line contains invalid UTF-8 (or Unicode replacement character).') + if '\0' in line: + error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') + + +def CheckForNewlineAtEOF(filename, lines, error): + """Logs an error if there is no newline char at the end of the file. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + pass + #error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, + # 'Could not find a newline character at the end of the file.') + + +def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): + """Logs an error if we see /* ... */ or "..." that extend past one line. + + /* ... */ comments are legit inside macros, for one line. + Otherwise, we prefer // comments, so it's ok to warn about the + other. Likewise, it's ok for strings to extend across multiple + lines, as long as a line continuation character (backslash) + terminates each line. Although not currently prohibited by the C++ + style guide, it's ugly and unnecessary. We don't do well with either + in this lint program, so we warn about both. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remove all \\ (escaped backslashes) from the line. They are OK, and the + # second (escaped) slash may trigger later \" detection erroneously. + line = line.replace('\\\\', '') + + if line.count('/*') > line.count('*/'): + error(filename, linenum, 'readability/multiline_comment', 5, + 'Complex multi-line /*...*/-style comment found. ' + 'Lint may give bogus warnings. ' + 'Consider replacing these with //-style comments, ' + 'with #if 0...#endif, ' + 'or with more clearly structured multi-line comments.') + + if (line.count('"') - line.count('\\"')) % 2: + error(filename, linenum, 'readability/multiline_string', 5, + 'Multi-line string ("...") found. This lint script doesn\'t ' + 'do well with such strings, and may give bogus warnings. ' + 'Use C++11 raw strings or concatenation instead.') + + +# (non-threadsafe name, thread-safe alternative, validation pattern) +# +# The validation pattern is used to eliminate false positives such as: +# _rand(); // false positive due to substring match. +# ->rand(); // some member function rand(). +# ACMRandom rand(seed); // some variable named rand. +# ISAACRandom rand(); // another variable named rand. +# +# Basically we require the return value of these functions to be used +# in some expression context on the same line by matching on some +# operator before the function name. This eliminates constructors and +# member function calls. +_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' +_THREADING_LIST = ( + ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), + ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), + ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), + ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), + ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), + ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), + ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), + ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), + ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), + ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), + ('strtok(', 'strtok_r(', + _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), + ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), + ) + + +def CheckPosixThreading(filename, clean_lines, linenum, error): + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if Search(pattern, line): + error(filename, linenum, 'runtime/threadsafe_fn', 2, + 'Consider using ' + multithread_safe_func + + '...) instead of ' + single_thread_func + + '...) for improved thread safety.') + + +def CheckVlogArguments(filename, clean_lines, linenum, error): + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): + error(filename, linenum, 'runtime/vlog', 5, + 'VLOG() should be used with numeric verbosity level. ' + 'Use LOG() if you want symbolic severity levels.') + +# Matches invalid increment: *count++, which moves pointer instead of +# incrementing a value. +_RE_PATTERN_INVALID_INCREMENT = re.compile( + r'^\s*\*\w+(\+\+|--);') + + +def CheckInvalidIncrement(filename, clean_lines, linenum, error): + """Checks for invalid increment *count++. + + For example following function: + void increment_counter(int* count) { + *count++; + } + is invalid, because it effectively does count++, moving pointer, and should + be replaced with ++*count, (*count)++ or *count += 1. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if _RE_PATTERN_INVALID_INCREMENT.match(line): + error(filename, linenum, 'runtime/invalid_increment', 5, + 'Changing pointer instead of value (or unused value of operator*).') + + +def IsMacroDefinition(clean_lines, linenum): + if Search(r'^#define', clean_lines[linenum]): + return True + + if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): + return True + + return False + + +def IsForwardClassDeclaration(clean_lines, linenum): + return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) + + +class _BlockInfo(object): + """Stores information about a generic block of code.""" + + def __init__(self, linenum, seen_open_brace): + self.starting_linenum = linenum + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + self.check_namespace_indentation = False + + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. + + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. + + This is mostly used for checking end of namespace comments. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def IsBlockInfo(self): + """Returns true if this block is a _BlockInfo. + + This is convenient for verifying that an object is an instance of + a _BlockInfo, but not an instance of any of the derived classes. + + Returns: + True for this class, False for derived classes. + """ + return self.__class__ == _BlockInfo + + +class _ExternCInfo(_BlockInfo): + """Stores information about an 'extern "C"' block.""" + + def __init__(self, linenum): + _BlockInfo.__init__(self, linenum, True) + + +class _ClassInfo(_BlockInfo): + """Stores information about a class.""" + + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name + self.is_derived = False + self.check_namespace_indentation = True + if class_or_struct == 'struct': + self.access = 'public' + self.is_struct = True + else: + self.access = 'private' + self.is_struct = False + + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + + # Try to find the end of the class. This will be confused by things like: + # class A { + # } *x = { ... + # + # But it's still good enough for CheckSectionSpacing. + self.last_line = 0 + depth = 0 + for i in range(linenum, clean_lines.NumLines()): + line = clean_lines.elided[i] + depth += line.count('{') - line.count('}') + if not depth: + self.last_line = i + break + + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + self.is_derived = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + # If there is a DISALLOW macro, it should appear near the end of + # the class. + seen_last_thing_in_class = False + for i in xrange(linenum - 1, self.starting_linenum, -1): + match = Search( + r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + + self.name + r'\)', + clean_lines.elided[i]) + if match: + if seen_last_thing_in_class: + error(filename, i, 'readability/constructors', 3, + match.group(1) + ' should be the last thing in the class') + break + + if not Match(r'^\s*$', clean_lines.elided[i]): + seen_last_thing_in_class = True + + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = Match(r'^( *)\}', clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = 'struct ' + self.name + else: + parent = 'class ' + self.name + error(filename, linenum, 'whitespace/indent', 3, + 'Closing brace should be aligned with beginning of %s' % parent) + + +class _NamespaceInfo(_BlockInfo): + """Stores information about a namespace.""" + + def __init__(self, name, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name or '' + self.check_namespace_indentation = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] + + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if (linenum - self.starting_linenum < 10 + and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. + # + # We also accept stuff like "// end of namespace ." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. + if self.name: + # Named namespace + if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + + re.escape(self.name) + r'[\*/\.\\\s]*$'), + line): + error(filename, linenum, 'readability/namespace', 5, + 'Namespace should be terminated with "// namespace %s"' % + self.name) + else: + # Anonymous namespace + if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + # If "// namespace anonymous" or "// anonymous namespace (more text)", + # mention "// anonymous namespace" as an acceptable form + if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"' + ' or "// anonymous namespace"') + else: + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"') + + +class _PreprocessorInfo(object): + """Stores checkpoints of nesting stacks when #if/#else is seen.""" + + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if + + # The entire nesting stack up to #else + self.stack_before_else = [] + + # Whether we have already seen #else or #elif + self.seen_else = False + + +class NestingState(object): + """Holds states related to parsing braces.""" + + def __init__(self): + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Top of the previous stack before each Update(). + # + # Because the nesting_stack is updated at the end of each line, we + # had to do some convoluted checks to find out what is the current + # scope at the beginning of the line. This check is simplified by + # saving the previous top of nesting stack. + # + # We could save the full stack, but we only need the top. Copying + # the full nesting stack would slow down cpplint by ~10%. + self.previous_stack_top = [] + + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] + + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. + + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace + + def InNamespaceBody(self): + """Check if we are currently one level inside a namespace body. + + Returns: + True if top of the stack is a namespace block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + + def InExternC(self): + """Check if we are currently one level inside an 'extern "C"' block. + + Returns: + True if top of the stack is an extern block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ExternCInfo) + + def InClassDeclaration(self): + """Check if we are currently one level inside a class or struct declaration. + + Returns: + True if top of the stack is a class/struct, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ClassInfo) + + def InAsmBlock(self): + """Check if we are currently one level inside an inline ASM block. + + Returns: + True if the top of the stack is a block containing inline ASM. + """ + return self.stack and self.stack[-1].inline_asm != _NO_ASM + + def InTemplateArgumentList(self, clean_lines, linenum, pos): + """Check if current position is inside template argument list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: position just after the suspected template argument. + Returns: + True if (linenum, pos) is inside template arguments. + """ + while linenum < clean_lines.NumLines(): + # Find the earliest character that might indicate a template argument + line = clean_lines.elided[linenum] + match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) + if not match: + linenum += 1 + pos = 0 + continue + token = match.group(1) + pos += len(match.group(0)) + + # These things do not look like template argument list: + # class Suspect { + # class Suspect x; } + if token in ('{', '}', ';'): return False + + # These things look like template argument list: + # template + # template + # template + # template + if token in ('>', '=', '[', ']', '.'): return True + + # Check if token is an unmatched '<'. + # If not, move on to the next character. + if token != '<': + pos += 1 + if pos >= len(line): + linenum += 1 + pos = 0 + continue + + # We can't be sure if we just find a single '<', and need to + # find the matching '>'. + (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) + if end_pos < 0: + # Not sure if template argument list or syntax error in file + return False + linenum = end_line + pos = end_pos + return False + + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif + + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. + + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. + + Args: + line: current line to check. + """ + if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif Match(r'^\s*#\s*(else|elif)\b', line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif Match(r'^\s*#\s*endif\b', line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass + + # TODO(unknown): Update() is too long, but we will refactor later. + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remember top of the previous nesting stack. + # + # The stack is always pushed/popped and not modified in place, so + # we can just do a shallow copy instead of copy.deepcopy. Using + # deepcopy would slow down cpplint by ~28%. + if self.stack: + self.previous_stack_top = self.stack[-1] + else: + self.previous_stack_top = None + + # Update pp_stack + self.UpdatePreprocessor(line) + + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count('(') - line.count(')') + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if (depth_change != 0 and + inner_block.open_parentheses == 1 and + _MATCH_ASM.match(line)): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif (inner_block.inline_asm == _INSIDE_ASM and + inner_block.open_parentheses == 0): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find('{') != -1: + new_namespace.seen_open_brace = True + line = line[line.find('{') + 1:] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + class_decl_match = Match( + r'^(\s*(?:template\s*<[\w\s<>,:]*>\s*)?' + r'(class|struct)\s+(?:[A-Z_]+\s+)*(\w+(?:::\w+)*))' + r'(.*)$', line) + if (class_decl_match and + (not self.stack or self.stack[-1].open_parentheses == 0)): + # We do not want to accept classes that are actually template arguments: + # template , + # template class Ignore3> + # void Function() {}; + # + # To avoid template argument cases, we scan forward and look for + # an unmatched '>'. If we see one, assume we are inside a + # template argument list. + end_declaration = len(class_decl_match.group(1)) + if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): + self.stack.append(_ClassInfo( + class_decl_match.group(3), class_decl_match.group(2), + clean_lines, linenum)) + line = class_decl_match.group(4) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + classinfo = self.stack[-1] + access_match = Match( + r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' + r':(?:[^:]|$)', + line) + if access_match: + classinfo.access = access_match.group(2) + + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + #indent = access_match.group(1) + #if (len(indent) != classinfo.class_indent + 1 and + # Match(r'^\s*$', indent)): + # if classinfo.is_struct: + # parent = 'struct ' + classinfo.name + # else: + # parent = 'class ' + classinfo.name + # slots = '' + # if access_match.group(3): + # slots = access_match.group(3) + # error(filename, linenum, 'whitespace/indent', 3, + # '%s%s: should be indented +1 space inside %s %d %d' % ( + # access_match.group(2), slots, parent, len(indent), classinfo.class_indent + 1)) + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) + if not matched: + break + + token = matched.group(1) + if token == '{': + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + elif Match(r'^extern\s*"[^"]*"\s*\{', line): + self.stack.append(_ExternCInfo(linenum)) + else: + self.stack.append(_BlockInfo(linenum, True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + + elif token == ';' or token == ')': + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) + + def InnermostClass(self): + """Get class info on the top of the stack. + + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. + """ + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None + + def CheckCompletedBlocks(self, filename, error): + """Checks that all classes and namespaces have been completely parsed. + + Call this when all lines in a file have been processed. + Args: + filename: The name of the current file. + error: The function to call with any errors found. + """ + # Note: This test can result in false positives if #ifdef constructs + # get in the way of brace matching. See the testBuildClass test in + # cpplint_unittest.py for an example of this. + for obj in self.stack: + if isinstance(obj, _ClassInfo): + error(filename, obj.starting_linenum, 'build/class', 5, + 'Failed to find complete declaration of class %s' % + obj.name) + elif isinstance(obj, _NamespaceInfo): + error(filename, obj.starting_linenum, 'build/namespaces', 5, + 'Failed to find complete declaration of namespace %s' % + obj.name) + + +def CheckForNonStandardConstructs(filename, clean_lines, linenum, + nesting_state, error): + r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. + + Complain about several constructs which gcc-2 accepts, but which are + not standard C++. Warning about these in lint is one way to ease the + transition to new compilers. + - put storage class first (e.g. "static const" instead of "const static"). + - "%lld" instead of %qd" in printf-type functions. + - "%1$d" is non-standard in printf-type functions. + - "\%" is an undefined character escape sequence. + - text after #endif is not allowed. + - invalid inner-style forward declaration. + - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', + line): + error(filename, linenum, 'build/deprecated', 3, + '>? and ))?' + # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' + error(filename, linenum, 'runtime/member_string_references', 2, + 'const string& members are dangerous. It is much better to use ' + 'alternatives, such as pointers or simple constants.') + + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: + return + + # The class may have been declared with namespace or classname qualifiers. + # The constructor and destructor will not have those qualifiers. + base_classname = classinfo.name.split('::')[-1] + + # Look for single-argument constructors that aren't marked explicit. + # Technically a valid construct, but against style. + explicit_constructor_match = Match( + r'\s+(?:inline\s+)?(explicit\s+)?(?:inline\s+)?%s\s*' + r'\(((?:[^()]|\([^()]*\))*)\)' + % re.escape(base_classname), + line) + + if explicit_constructor_match: + is_marked_explicit = explicit_constructor_match.group(1) + + if not explicit_constructor_match.group(2): + constructor_args = [] + else: + constructor_args = explicit_constructor_match.group(2).split(',') + + # collapse arguments so that commas in template parameter lists and function + # argument parameter lists don't split arguments in two + i = 0 + while i < len(constructor_args): + constructor_arg = constructor_args[i] + while (constructor_arg.count('<') > constructor_arg.count('>') or + constructor_arg.count('(') > constructor_arg.count(')')): + constructor_arg += ',' + constructor_args[i + 1] + del constructor_args[i + 1] + constructor_args[i] = constructor_arg + i += 1 + + defaulted_args = [arg for arg in constructor_args if '=' in arg] + noarg_constructor = (not constructor_args or # empty arg list + # 'void' arg specifier + (len(constructor_args) == 1 and + constructor_args[0].strip() == 'void')) + onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg + not noarg_constructor) or + # all but at most one arg defaulted + (len(constructor_args) >= 1 and + not noarg_constructor and + len(defaulted_args) >= len(constructor_args) - 1)) + initializer_list_constructor = bool( + onearg_constructor and + Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) + copy_constructor = bool( + onearg_constructor and + Match(r'(const\s+)?%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' + % re.escape(base_classname), constructor_args[0].strip())) + + if (not is_marked_explicit and + onearg_constructor and + not initializer_list_constructor and + not copy_constructor): + if defaulted_args: + error(filename, linenum, 'runtime/explicit', 5, + 'Constructors callable with one argument ' + 'should be marked explicit.') + else: + error(filename, linenum, 'runtime/explicit', 5, + 'Single-parameter constructors should be marked explicit.') + elif is_marked_explicit and not onearg_constructor: + if noarg_constructor: + error(filename, linenum, 'runtime/explicit', 5, + 'Zero-parameter constructors should not be marked explicit.') + + +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in (r'\bif\s*\((.*)\)\s*{', + r'\bfor\s*\((.*)\)\s*{', + r'\bwhile\s*\((.*)\)\s*[{;]', + r'\bswitch\s*\((.*)\)\s*{'): + match = Search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break + + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', + fncall) and + # Ignore pointers/references to functions. + not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and + # Ignore pointers/references to arrays. + not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): + if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space after ( in function call') + elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space after (') + if (Search(r'\w\s+\(', fncall) and + not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and + not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and + not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and + not Search(r'\bcase\s+\(', fncall)): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if Search(r'\boperator_*\b', line): + error(filename, linenum, 'whitespace/parens', 0, + 'Extra space before ( in function call') + else: + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space before ( in function call') + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if Search(r'[^)]\s+\)\s*[^{\s]', fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if Search(r'^\s+\)', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Closing ) should be moved to the previous line') + else: + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space before )') + + +def IsBlankLine(line): + """Returns true if the given line is blank. + + We consider a line to be blank if the line is empty or consists of + only white spaces. + + Args: + line: A line of a string. + + Returns: + True, if the given line is blank. + """ + return not line or line.isspace() + + +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error): + is_namespace_indent_item = ( + len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and + nesting_state.previous_stack_top == nesting_state.stack[-2]) + + if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + clean_lines.elided, line): + CheckItemIndentationInNamespace(filename, clean_lines.elided, + line, error) + + +def CheckForFunctionLengths(filename, clean_lines, linenum, + function_state, error): + """Reports for long function bodies. + + For an overview why this is done, see: + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + + Uses a simplistic algorithm assuming other style guidelines + (especially spacing) are followed. + Only checks unindented functions, so class members are unchecked. + Trivial bodies are unchecked, so constructors with huge initializer lists + may be missed. + Blank/comment lines are not counted so as to avoid encouraging the removal + of vertical space and comments just to get through a lint check. + NOLINT *on the last line of a function* disables this check. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + function_state: Current function name and lines in body so far. + error: The function to call with any errors found. + """ + lines = clean_lines.lines + line = lines[linenum] + joined_line = '' + + starting_func = False + regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... + match_result = Match(regexp, line) + if match_result: + # If the name is all caps and underscores, figure it's a macro and + # ignore it, unless it's TEST or TEST_F. + function_name = match_result.group(1).split()[-1] + if function_name == 'TEST' or function_name == 'TEST_F' or ( + not Match(r'[A-Z_]+$', function_name)): + starting_func = True + + if starting_func: + body_found = False + for start_linenum in xrange(linenum, clean_lines.NumLines()): + start_line = lines[start_linenum] + joined_line += ' ' + start_line.lstrip() + if Search(r'(;|})', start_line): # Declarations and trivial functions + body_found = True + break # ... ignore + elif Search(r'{', start_line): + body_found = True + function = Search(r'((\w|:)*)\(', line).group(1) + if Match(r'TEST', function): # Handle TEST... macros + parameter_regexp = Search(r'(\(.*\))', joined_line) + if parameter_regexp: # Ignore bad syntax + function += parameter_regexp.group(1) + else: + function += '()' + function_state.Begin(function) + break + if not body_found: + # No body for the function (or evidence of a non-function) was found. + error(filename, linenum, 'readability/fn_size', 5, + 'Lint failed to find start of function body.') + elif Match(r'^\}\s*$', line): # function end + function_state.Check(error, filename, linenum) + function_state.End() + elif not Match(r'^\s*$', line): + function_state.Count() # Count non-blank/non-comment lines. + + +_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') + + +def CheckComment(line, filename, linenum, next_line_start, error): + """Checks for common mistakes in comments. + + Args: + line: The line in question. + filename: The name of the current file. + linenum: The number of the line to check. + next_line_start: The first non-whitespace column of the next line. + error: The function to call with any errors found. + """ + commentpos = line.find('//') + if commentpos != -1: + # Check if the // may be in quotes. If so, ignore it + if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and + ((commentpos >= 1 and + line[commentpos-1] not in string.whitespace) or + (commentpos >= 2 and + line[commentpos-2] not in string.whitespace)) and linenum != 1): + error(filename, linenum, 'whitespace/comments', 2, + 'At least two spaces is best between code and comments') + + # Checks for common mistakes in TODO comments. + comment = line[commentpos:] + match = _RE_PATTERN_TODO.match(comment) + if match: + # One whitespace is correct; zero whitespace is handled elsewhere. + leading_whitespace = match.group(1) + if len(leading_whitespace) > 1: + error(filename, linenum, 'whitespace/todo', 2, + 'Too many spaces before TODO') + + username = match.group(2) + #if not username: + # error(filename, linenum, 'readability/todo', 2, + # 'Missing username in TODO; it should look like ' + # '"// TODO(my_username): Stuff."') + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != ' ' and middle_whitespace != '': + error(filename, linenum, 'whitespace/todo', 2, + 'TODO(my_username) should be followed by a space') + + # If the comment contains an alphanumeric character, there + # should be a space somewhere between it and the // unless + # it's a /// or //! Doxygen comment. + if (Match(r'//[^ ]*\w', comment) and + not Match(r'(///|//\!)(\s+|$)', comment)): + error(filename, linenum, 'whitespace/comments', 4, + 'Should have a space between // and comment') + + +def CheckAccess(filename, clean_lines, linenum, nesting_state, error): + """Checks for improper use of DISALLOW* macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] # get rid of comments and strings + + matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|' + r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line) + if not matched: + return + if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo): + if nesting_state.stack[-1].access != 'private': + error(filename, linenum, 'readability/constructors', 3, + '%s must be in the private: section' % matched.group(1)) + + else: + # Found DISALLOW* macro outside a class declaration, or perhaps it + # was used inside a function when it should have been part of the + # class declaration. We could issue a warning here, but it + # probably resulted in a compiler error already. + pass + + +def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for the correctness of various spacing issues in the code. + + Things we check for: spaces around operators, spaces after + if/for/while/switch, no spaces around parens in function calls, two + spaces between code and comment, don't start a block with a blank + line, don't end a function with a blank line, don't add a blank line + after public/protected/private, don't have too many blank lines in a row. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw = clean_lines.lines_without_raw_strings + line = raw[linenum] + + # Before nixing comments, check if the line is blank for no good + # reason. This includes the first line after a block is opened, and + # blank lines at the end of a function (ie, right before a line like '}' + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { + # + # } + # + # A warning about missing end of namespace comments will be issued instead. + # + # Also skip blank line checks for 'extern "C"' blocks, which are formatted + # like namespaces. + if (IsBlankLine(line) and + not nesting_state.InNamespaceBody() and + not nesting_state.InExternC()): + elided = clean_lines.elided + prev_line = elided[linenum - 1] + prevbrace = prev_line.rfind('{') + # TODO(unknown): Don't complain if line before blank line, and line after, + # both start with alnums and are indented the same amount. + # This ignores whitespace at the start of a namespace block + # because those are not usually indented. + if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: + # OK, we have a blank line at the start of a code block. Before we + # complain, we check if it is an exception to the rule: The previous + # non-empty line has the parameters of a function header that are indented + # 4 spaces (because they did not fit in a 80 column line when placed on + # the same line as the function name). We also check for the case where + # the previous line is indented 6 spaces, which may happen when the + # initializers of a constructor do not fit into a 80 column line. + exception = False + if Match(r' {6}\w', prev_line): # Initializer list? + # We are looking for the opening column of initializer list, which + # should be indented 4 spaces to cause 6 space indentation afterwards. + search_position = linenum-2 + while (search_position >= 0 + and Match(r' {6}\w', elided[search_position])): + search_position -= 1 + exception = (search_position >= 0 + and elided[search_position][:5] == ' :') + else: + # Search for the function arguments or an initializer list. We use a + # simple heuristic here: If the line is indented 4 spaces; and we have a + # closing paren, without the opening paren, followed by an opening brace + # or colon (for initializer lists) we assume that it is the last line of + # a function header. If we have a colon indented 4 spaces, it is an + # initializer list. + exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', + prev_line) + or Match(r' {4}:', prev_line)) + + if not exception: + error(filename, linenum, 'whitespace/blank_line', 2, + 'Redundant blank line at the start of a code block ' + 'should be deleted.') + # Ignore blank lines at the end of a block in a long if-else + # chain, like this: + # if (condition1) { + # // Something followed by a blank line + # + # } else if (condition2) { + # // Something else + # } + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + if (next_line + and Match(r'\s*}', next_line) + and next_line.find('} else ') == -1): + error(filename, linenum, 'whitespace/blank_line', 3, + 'Redundant blank line at the end of a code block ' + 'should be deleted.') + + matched = Match(r'\s*(public|protected|private):', prev_line) + if matched: + error(filename, linenum, 'whitespace/blank_line', 3, + 'Do not leave a blank line after "%s:"' % matched.group(1)) + + # Next, check comments + next_line_start = 0 + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + next_line_start = len(next_line) - len(next_line.lstrip()) + CheckComment(line, filename, linenum, next_line_start, error) + + # get rid of comments and strings + line = clean_lines.elided[linenum] + + # You shouldn't have spaces before your brackets, except maybe after + # 'delete []' or 'return []() {};' + if Search(r'\w\s+\[', line) and not Search(r'(?:delete|return)\s+\[', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Extra space before [') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (Search(r'for *\(.*[^:]:[^: ]', line) or + Search(r'for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') + + +def CheckOperatorSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) + if match: + line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ((Search(r'[\w.]=', line) or + Search(r'=[\w.]', line)) + and not Search(r'\b(if|while|for) ', line) + # Operators taken from [lex.operators] in C++11 standard. + and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) + and not Search(r'operator=', line)): + error(filename, linenum, 'whitespace/operators', 4, + 'Missing spaces around =') + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around %s' % match.group(1)) + elif not Match(r'#.*include', line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = Match(r'^(.*[^\s<])<[^\s=<,]', line) + if match: + (_, _, end_pos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = Match(r'^(.*[^-\s>])>[^\s=>,]', line) + if match: + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1))) + if start_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) + if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and + not (match.group(1) == 'operator' and match.group(2) == ';')): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = Search(r'>>[a-zA-Z_]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >>') + + # There shouldn't be space around unary operators + match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) + if match: + error(filename, linenum, 'whitespace/operators', 4, + 'Extra space for operator %s' % match.group(1)) + + +def CheckParenthesisSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around parentheses. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # No spaces after an if, while, switch, or for + match = Search(r' (if\(|for\(|while\(|switch\()', line) + if match: + error(filename, linenum, 'whitespace/parens', 5, + 'Missing space before ( in %s' % match.group(1)) + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = Search(r'\b(if|for|while|switch)\s*' + r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', + line) + if match: + if len(match.group(2)) != len(match.group(4)): + if not (match.group(3) == ';' and + len(match.group(2)) == 1 + len(match.group(4)) or + not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): + error(filename, linenum, 'whitespace/parens', 5, + 'Mismatching spaces inside () in %s' % match.group(1)) + if len(match.group(2)) not in [0, 1]: + error(filename, linenum, 'whitespace/parens', 5, + 'Should have zero or one spaces inside ( and ) in %s' % + match.group(1)) + + +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] + + # You should always have a space after a comma (either as fn arg or operator) + # + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. + # + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and + Search(r',[^,\s]', raw[linenum])): + error(filename, linenum, 'whitespace/comma', 3, + 'Missing space after ,') + + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if Search(r';[^\s};\\)/]', line): + error(filename, linenum, 'whitespace/semicolon', 3, + 'Missing space after ;') + + +def _IsType(clean_lines, nesting_state, expr): + """Check if expression looks like a type name, returns true if so. + + Args: + clean_lines: A CleansedLines instance containing the file. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + expr: The expression to check. + Returns: + True, if token looks like a type. + """ + # Keep only the last token in the expression + last_word = Match(r'^.*(\b\S+)$', expr) + if last_word: + token = last_word.group(1) + else: + token = expr + + # Match native types and stdint types + if _TYPES.match(token): + return True + + # Try a bit harder to match templated types. Walk up the nesting + # stack until we find something that resembles a typename + # declaration for what we are looking for. + typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + + r'\b') + block_index = len(nesting_state.stack) - 1 + while block_index >= 0: + if isinstance(nesting_state.stack[block_index], _NamespaceInfo): + return False + + # Found where the opening brace is. We want to scan from this + # line up to the beginning of the function, minus a few lines. + # template + # class C + # : public ... { // start scanning here + last_line = nesting_state.stack[block_index].starting_linenum + + next_block_start = 0 + if block_index > 0: + next_block_start = nesting_state.stack[block_index - 1].starting_linenum + first_line = last_line + while first_line >= next_block_start: + if clean_lines.elided[first_line].find('template') >= 0: + break + first_line -= 1 + if first_line < next_block_start: + # Didn't find any "template" keyword before reaching the next block, + # there are probably no template things to check for this block + block_index -= 1 + continue + + # Look for typename in the specified range + for i in xrange(first_line, last_line + 1, 1): + if Search(typename_pattern, clean_lines.elided[i]): + return True + block_index -= 1 + + return False + + +def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for horizontal spacing near commas. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = Match(r'^(.*[^ ({>]){', line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> + # + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". + # + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + trailing_text = '' + if endpos > -1: + trailing_text = endline[endpos:] + for offset in xrange(endlinenum + 1, + min(endlinenum + 3, clean_lines.NumLines() - 1)): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) + and not _IsType(clean_lines, nesting_state, leading_text)): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before {') + + # Make sure '} else {' has spaces. + if Search(r'}else', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before else') + + # You shouldn't have a space before a semicolon at the end of the line. + # There's a special case for "for" since the style guide allows space before + # the semicolon there. + if Search(r':\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Semicolon defining empty statement. Use {} instead.') + elif Search(r'^\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Line contains only semicolon. If this should be an empty statement, ' + 'use {} instead.') + elif (Search(r'\s+;\s*$', line) and + not Search(r'\bfor\b', line)): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Extra space before last semicolon. If this should be an empty ' + 'statement, use {} instead.') + + +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if Search(r'\bdecltype\s*$', text[0:start_col]): + return True + return False + + +def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): + """Checks for additional blank line issues related to sections. + + Currently the only thing checked here is blank line before protected/private. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + class_info: A _ClassInfo objects. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Skip checks if the class is small, where small means 25 lines or less. + # 25 lines seems like a good cutoff since that's the usual height of + # terminals, and any class that can't fit in one screen can't really + # be considered "small". + # + # Also skip checks if we are on the first line. This accounts for + # classes that look like + # class Foo { public: ... }; + # + # If we didn't find the end of the class, last_line would be zero, + # and the check will be skipped by the first condition. + if (class_info.last_line - class_info.starting_linenum <= 24 or + linenum <= class_info.starting_linenum): + return + + matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) + if matched: + # Issue warning if the line before public/protected/private was + # not a blank line, but don't do this if the previous line contains + # "class" or "struct". This can happen two ways: + # - We are at the beginning of the class. + # - We are forward-declaring an inner class that is semantically + # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. + prev_line = clean_lines.lines[linenum - 1] + if (not IsBlankLine(prev_line) and + not Search(r'\b(class|struct)\b', prev_line) and + not Search(r'\\$', prev_line)): + # Try a bit harder to find the beginning of the class. This is to + # account for multi-line base-specifier lists, e.g.: + # class Derived + # : public Base { + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): + if Search(r'\{\s*$', clean_lines.lines[i]): + end_class_head = i + break + if end_class_head < linenum - 1: + error(filename, linenum, 'whitespace/blank_line', 3, + '"%s:" should be preceded by a blank line' % matched.group(1)) + + +def GetPreviousNonBlankLine(clean_lines, linenum): + """Return the most recent non-blank line and its line number. + + Args: + clean_lines: A CleansedLines instance containing the file contents. + linenum: The number of the line to check. + + Returns: + A tuple with two elements. The first element is the contents of the last + non-blank line before the current line, or the empty string if this is the + first non-blank line. The second is the line number of that line, or -1 + if this is the first non-blank line. + """ + + prevlinenum = linenum - 1 + while prevlinenum >= 0: + prevline = clean_lines.elided[prevlinenum] + if not IsBlankLine(prevline): # if not a blank line... + return (prevline, prevlinenum) + prevlinenum -= 1 + return ('', -1) + + +def CheckBraces(filename, clean_lines, linenum, error): + """Looks for misplaced braces (e.g. at the end of line). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] # get rid of comments and strings + + if Match(r'\s*{\s*$', line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if (not Search(r'[,;:}{(]\s*$', prevline) and + not Match(r'\s*#', prevline) and + not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): + error(filename, linenum, 'whitespace/braces', 4, + '{ should almost always be at the end of the previous line') + + # An else clause should be on the same line as the preceding closing brace. + if Match(r'\s*else\b\s*(?:if\b|\{|$)', line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if Match(r'\s*}\s*$', prevline): + error(filename, linenum, 'whitespace/newline', 4, + 'An else should appear on the same line as the preceding }') + + # If braces come on one side of an else, they should be on both. + # However, we have to worry about "else if" that spans multiple lines! + if Search(r'else if\s*\(', line): # could be multi-line if + brace_on_left = bool(Search(r'}\s*else if\s*\(', line)) + # find the ( after the if + pos = line.find('else if') + pos = line.find('(', pos) + if pos > 0: + (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) + brace_on_right = endline[endpos:].find('{') != -1 + if brace_on_left != brace_on_right: # must be brace after if + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + + # Likewise, an else should never have the else clause on the same line + if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): + error(filename, linenum, 'whitespace/newline', 4, + 'Else clause should never be on same line as else (use 2 lines)') + + # In the same way, a do/while should never be on one line + if Match(r'\s*do [^\s{]', line): + error(filename, linenum, 'whitespace/newline', 4, + 'do/while clauses should not be on a single line') + + # Check single-line if/else bodies. The style guide says 'curly braces are not + # required for single-line statements'. We additionally allow multi-line, + # single statements, but we reject anything with more than one semicolon in + # it. This means that the first semicolon after the if should be at the end of + # its line, and the line after that should have an indent level equal to or + # lower than the if. We also check for ambiguous if/else nesting without + # braces. + if_else_match = Search(r'\b(if\s*\(|else\b)', line) + if if_else_match and not Match(r'\s*#', line): + if_indent = GetIndentLevel(line) + endline, endlinenum, endpos = line, linenum, if_else_match.end() + if_match = Search(r'\bif\s*\(', line) + if if_match: + # This could be a multiline if condition, so find the end first. + pos = if_match.end() - 1 + (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) + # Check for an opening brace, either directly after the if or on the next + # line. If found, this isn't a single-statement conditional. + if (not Match(r'\s*{', endline[endpos:]) + and not (Match(r'\s*$', endline[endpos:]) + and endlinenum < (len(clean_lines.elided) - 1) + and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): + while (endlinenum < len(clean_lines.elided) + and ';' not in clean_lines.elided[endlinenum][endpos:]): + endlinenum += 1 + endpos = 0 + if endlinenum < len(clean_lines.elided): + endline = clean_lines.elided[endlinenum] + # We allow a mix of whitespace and closing braces (e.g. for one-liner + # methods) and a single \ after the semicolon (for macros) + endpos = endline.find(';') + if not Match(r';[\s}]*(\\?)$', endline[endpos:]): + # Semicolon isn't the last character, there's something trailing. + # Output a warning if the semicolon is not contained inside + # a lambda expression. + if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', + endline): + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + elif endlinenum < len(clean_lines.elided) - 1: + # Make sure the next line is dedented + next_line = clean_lines.elided[endlinenum + 1] + next_indent = GetIndentLevel(next_line) + # With ambiguous nested if statements, this will error out on the + # if that *doesn't* match the else, regardless of whether it's the + # inner one or outer one. + if (if_match and Match(r'\s*else\b', next_line) + and next_indent != if_indent): + error(filename, linenum, 'readability/braces', 4, + 'Else clause should be indented at the same level as if. ' + 'Ambiguous nested if/else chains require braces.') + elif next_indent > if_indent: + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + + +def CheckTrailingSemicolon(filename, clean_lines, linenum, error): + """Looks for redundant trailing semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] + + # Block bodies should not be followed by a semicolon. Due to C++11 + # brace initialization, there are more places where semicolons are + # required than not, so we use a whitelist approach to check these + # rather than a blacklist. These are the places where "};" should + # be replaced by just "}": + # 1. Some flavor of block following closing parenthesis: + # for (;;) {}; + # while (...) {}; + # switch (...) {}; + # Function(...) {}; + # if (...) {}; + # if (...) else if (...) {}; + # + # 2. else block: + # if (...) else {}; + # + # 3. const member function: + # Function(...) const {}; + # + # 4. Block following some statement: + # x = 42; + # {}; + # + # 5. Block at the beginning of a function: + # Function(...) { + # {}; + # } + # + # Note that naively checking for the preceding "{" will also match + # braces inside multi-dimensional arrays, but this is fine since + # that expression will not contain semicolons. + # + # 6. Block following another block: + # while (true) {} + # {}; + # + # 7. End of namespaces: + # namespace {}; + # + # These semicolons seems far more common than other kinds of + # redundant semicolons, possibly due to people converting classes + # to namespaces. For now we do not warn for this case. + # + # Try matching case 1 first. + match = Match(r'^(.*\)\s*)\{', line) + if match: + # Matched closing parenthesis (case 1). Check the token before the + # matching opening parenthesis, and don't warn if it looks like a + # macro. This avoids these false positives: + # - macro that defines a base class + # - multi-line macro that defines a base class + # - macro that defines the whole class-head + # + # But we still issue warnings for macros that we know are safe to + # warn, specifically: + # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P + # - TYPED_TEST + # - INTERFACE_DEF + # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: + # + # We implement a whitelist of safe macros instead of a blacklist of + # unsafe macros, even though the latter appears less frequently in + # google code and would have been easier to implement. This is because + # the downside for getting the whitelist wrong means some extra + # semicolons, while the downside for getting the blacklist wrong + # would result in compile errors. + # + # In addition to macros, we also don't want to warn on + # - Compound literals + # - Lambdas + # - alignas specifier with anonymous structs + # - decltype + closing_brace_pos = match.group(1).rfind(')') + opening_parenthesis = ReverseCloseExpression( + clean_lines, linenum, closing_brace_pos) + if opening_parenthesis[2] > -1: + line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] + macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) + func = Match(r'^(.*\])\s*$', line_prefix) + if ((macro and + macro.group(1) not in ( + 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', + 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', + 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or + (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or + Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or + Search(r'\bdecltype$', line_prefix) or + Search(r'\s+=\s*$', line_prefix)): + match = None + if (match and + opening_parenthesis[1] > 1 and + Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): + # Multi-line lambda-expression + match = None + + else: + # Try matching cases 2-3. + match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) + if not match: + # Try matching cases 4-6. These are always matched on separate lines. + # + # Note that we can't simply concatenate the previous line to the + # current line and do a single match, otherwise we may output + # duplicate warnings for the blank line case: + # if (cond) { + # // blank line + # } + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if prevline and Search(r'[;{}]\s*$', prevline): + match = Match(r'^(\s*)\{', line) + + # Check matching closing brace + if match: + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if endpos > -1 and Match(r'^\s*;', endline[endpos:]): + # Current {} pair is eligible for semicolon check, and we have found + # the redundant semicolon, output warning here. + # + # Note: because we are scanning forward for opening braces, and + # outputting warnings for the matching closing brace, if there are + # nested blocks with trailing semicolons, we will get the error + # messages in reversed order. + + # We need to check the line forward for NOLINT + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, + error) + ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, + error) + + error(filename, endlinenum, 'readability/braces', 4, + "You don't need a ; after a }") + + +def CheckEmptyBlockBody(filename, clean_lines, linenum, error): + """Look for empty loop/conditional body with only a single semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + # + # We also check "if" blocks here, since an empty conditional block + # is likely an error. + line = clean_lines.elided[linenum] + matched = Match(r'\s*(for|while|if)\s*\(', line) + if matched: + # Find the end of the conditional expression. + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find('(')) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and Match(r';', end_line[end_pos:]): + if matched.group(1) == 'if': + error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, + 'Empty conditional bodies should use {}') + else: + error(filename, end_linenum, 'whitespace/empty_loop_body', 5, + 'Empty loop bodies should use {} or continue') + + # Check for if statements that have completely empty bodies (no comments) + # and no else clauses. + if end_pos >= 0 and matched.group(1) == 'if': + # Find the position of the opening { for the if statement. + # Return without logging an error if it has no brackets. + opening_linenum = end_linenum + opening_line_fragment = end_line[end_pos:] + # Loop until EOF or find anything that's not whitespace or opening {. + while not Search(r'^\s*\{', opening_line_fragment): + if Search(r'^(?!\s*$)', opening_line_fragment): + # Conditional has no brackets. + return + opening_linenum += 1 + if opening_linenum == len(clean_lines.elided): + # Couldn't find conditional's opening { or any code before EOF. + return + opening_line_fragment = clean_lines.elided[opening_linenum] + # Set opening_line (opening_line_fragment may not be entire opening line). + opening_line = clean_lines.elided[opening_linenum] + + # Find the position of the closing }. + opening_pos = opening_line_fragment.find('{') + if opening_linenum == end_linenum: + # We need to make opening_pos relative to the start of the entire line. + opening_pos += end_pos + (closing_line, closing_linenum, closing_pos) = CloseExpression( + clean_lines, opening_linenum, opening_pos) + if closing_pos < 0: + return + + # Now construct the body of the conditional. This consists of the portion + # of the opening line after the {, all lines until the closing line, + # and the portion of the closing line before the }. + if (clean_lines.raw_lines[opening_linenum] != + CleanseComments(clean_lines.raw_lines[opening_linenum])): + # Opening line ends with a comment, so conditional isn't empty. + return + if closing_linenum > opening_linenum: + # Opening line after the {. Ignore comments here since we checked above. + body = list(opening_line[opening_pos+1:]) + # All lines until closing line, excluding closing line, with comments. + body.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) + # Closing line before the }. Won't (and can't) have comments. + body.append(clean_lines.elided[closing_linenum][:closing_pos-1]) + body = '\n'.join(body) + else: + # If statement has brackets and fits on a single line. + body = opening_line[opening_pos+1:closing_pos-1] + + # Check if the body is empty + if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): + return + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while Search(r'^\s*$|^(?=\s*else)', current_line_fragment): + if Search(r'^(?=\s*else)', current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error(filename, end_linenum, 'whitespace/empty_if_body', 4, + ('If statement had no body and no else clause')) + + +def FindCheckMacro(line): + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = Match(r'^(.*\b' + macro + r'\s*)\(', line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) + + +def CheckCheck(filename, clean_lines, linenum, error): + """Checks the use of CHECK and EXPECT macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return + + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression( + clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not Match(r'\s*;', last_line[end_pos:]): + return + + if linenum == end_line: + expression = lines[linenum][start_pos + 1:end_pos - 1] + else: + expression = lines[linenum][start_pos + 1:] + for i in xrange(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0:end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = '' + rhs = '' + operator = None + while expression: + matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' + r'==|!=|>=|>|<=|<|\()(.*)$', expression) + if matched: + token = matched.group(1) + if token == '(': + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) + if end < 0: + return # Unmatched parenthesis + lhs += '(' + expression[0:end] + expression = expression[end:] + elif token in ('&&', '||'): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); + # + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) + if not matched: + matched = Match(r'^(\s*\S)(.*)$', expression) + if not matched: + break + lhs += matched.group(1) + expression = matched.group(2) + + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return + + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find('&&') > -1 or rhs.find('||') > -1: + return + + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) + # + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if Match(match_constant, lhs) or Match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error(filename, linenum, 'readability/check', 2, + 'Consider using %s instead of %s(a %s b)' % ( + _CHECK_REPLACEMENT[check_macro][operator], + check_macro, operator)) + + +def CheckAltTokens(filename, clean_lines, linenum, error): + """Check alternative keywords being used in boolean expressions. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if Match(r'^\s*#', line): + return + + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find('/*') >= 0 or line.find('*/') >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error(filename, linenum, 'readability/alt_tokens', 2, + 'Use operator %s instead of %s' % ( + _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) + + +def GetLineWidth(line): + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, unicode): + width = 0 + for uc in unicodedata.normalize('NFC', line): + if unicodedata.east_asian_width(uc) in ('W', 'F'): + width += 2 + elif not unicodedata.combining(uc): + width += 1 + return width + else: + return len(line) + + +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, + error): + """Checks rules from the 'C++ style rules' section of cppguide.html. + + Most of these rules are hard to test (naming, comment style), but we + do what we can. In particular we check for 2-space indents, line lengths, + tab usage, spaces inside code, etc. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw_lines = clean_lines.lines_without_raw_strings + line = raw_lines[linenum] + prev = raw_lines[linenum - 1] if linenum > 0 else '' + + if line.find('\t') != -1: + error(filename, linenum, 'whitespace/tab', 1, + 'Tab found; better to use spaces') + + # One or three blank spaces at the beginning of the line is weird; it's + # hard to reconcile that with 2-space indents. + # NOTE: here are the conditions rob pike used for his tests. Mine aren't + # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces + # if(RLENGTH > 20) complain = 0; + # if(match($0, " +(error|private|public|protected):")) complain = 0; + # if(match(prev, "&& *$")) complain = 0; + # if(match(prev, "\\|\\| *$")) complain = 0; + # if(match(prev, "[\",=><] *$")) complain = 0; + # if(match($0, " <<")) complain = 0; + # if(match(prev, " +for \\(")) complain = 0; + # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = r'\s*\w+\s*:\s*\\?$' + classinfo = nesting_state.InnermostClass() + initial_spaces = 0 + cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == ' ': + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if (not Search(r'[",=><] *$', prev) and + (initial_spaces == 1 or initial_spaces == 3) and + not Match(scope_or_label_pattern, cleansed_line) and + not (clean_lines.raw_lines[linenum] != line and + Match(r'^\s*""', line))): + error(filename, linenum, 'whitespace/indent', 3, + 'Weird number of spaces at line-start. ' + 'Are you using a 2-space indent?') + + if line and line[-1].isspace(): + error(filename, linenum, 'whitespace/end_of_line', 4, + 'Line ends in whitespace. Consider deleting these extra spaces.') + + # Check if the line is a header guard. + is_header_guard = False + if file_extension == 'h': + cppvar = GetHeaderGuardCPPVariable(filename) + if (line.startswith('#ifndef %s' % cppvar) or + line.startswith('#define %s' % cppvar) or + line.startswith('#endif // %s' % cppvar)): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + if (not line.startswith('#include') and not is_header_guard and + not Match(r'^\s*//.*http(s?)://\S*$', line) and + not Match(r'^\s*//\s*[^\s]*$', line) and + not Match(r'^// \$Id:.*#[0-9]+ \$$', line)): + line_width = GetLineWidth(line) + if line_width > _line_length: + error(filename, linenum, 'whitespace/line_length', 2, + 'Lines should be <= %i characters long' % _line_length) + + if (cleansed_line.count(';') > 1 and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find('for') == -1 and + (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or + GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and + # It's ok to have many commands in a switch case that fits in 1 line + not ((cleansed_line.find('case ') != -1 or + cleansed_line.find('default:') != -1) and + cleansed_line.find('break;') != -1)): + error(filename, linenum, 'whitespace/newline', 0, + 'More than one command on the same line') + + # Some more style checks + CheckBraces(filename, clean_lines, linenum, error) + CheckTrailingSemicolon(filename, clean_lines, linenum, error) + CheckEmptyBlockBody(filename, clean_lines, linenum, error) + CheckAccess(filename, clean_lines, linenum, nesting_state, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) + CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) + + +_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') +# Matches the first component of a filename delimited by -s and _s. That is: +# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' +_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') + + +def _DropCommonSuffixes(filename): + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in ('test.cc', 'regtest.cc', 'unittest.cc', + 'inl.h', 'impl.h', 'internal.h'): + if (filename.endswith(suffix) and len(filename) > len(suffix) and + filename[-len(suffix) - 1] in ('-', '_')): + return filename[:-len(suffix) - 1] + return os.path.splitext(filename)[0] + + +def _ClassifyInclude(fileinfo, include, is_system): + """Figures out what kind of header 'include' is. + + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + is_system: True if the #include used <> rather than "". + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_h = include in _CPP_HEADERS + + if is_system: + if is_cpp_h: + return _CPP_SYS_HEADER + else: + return _C_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = ( + os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + if target_base == include_base and ( + include_dir == target_dir or + include_dir == os.path.normpath(target_dir + '/../public')): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if (target_first_component and include_first_component and + target_first_component.group(0) == + include_first_component.group(0)): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER + + + +def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): + """Check rules that are applicable to #include lines. + + Strings on #include lines are NOT removed from elided line, to make + certain tasks easier. However, to prevent false positives, checks + applicable to #include lines in CheckLanguage must be put here. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + include_state: An _IncludeState instance in which the headers are inserted. + error: The function to call with any errors found. + """ + fileinfo = FileInfo(filename) + line = clean_lines.lines[linenum] + + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + match = Match(r'#include\s*"([^/]+\.h)"', line) + + # TODO: check it + #if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)): + # error(filename, linenum, 'build/include', 4, + # 'Include the directory when naming .h files') + + # we shouldn't include a file more than once. actually, there are a + # handful of instances where doing so is okay, but in general it's + # not. + match = _RE_PATTERN_INCLUDE.search(line) + if match: + include = match.group(2) + is_system = (match.group(1) == '<') + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error(filename, linenum, 'build/include', 4, + '"%s" already included at %s:%s' % + (include, filename, duplicate_line)) + elif (include.endswith('.cc') and + os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): + error(filename, linenum, 'build/include', 4, + 'Do not include .cc files from other packages') + elif not _THIRD_PARTY_HEADERS_PATTERN.match(include): + include_state.include_list[-1].append((include, linenum)) + + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, is_system)) + if error_message: + error(filename, linenum, 'build/include_order', 4, + '%s. Should be: %s.h, c system, c++ system, other.' % + (error_message, fileinfo.BaseName())) + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include): + error(filename, linenum, 'build/include_alpha', 4, + 'Include "%s" not in alphabetical order' % include) + include_state.SetLastHeader(canonical_include) + + + +def _GetTextInside(text, start_pattern): + r"""Retrieves all the text between matching open and close parentheses. + + Given a string of lines and a regular expression string, retrieve all the text + following the expression and between opening punctuation symbols like + (, [, or {, and the matching close-punctuation symbol. This properly nested + occurrences of the punctuations, so for the text like + printf(a(), b(c())); + a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. + start_pattern must match string having an open punctuation symbol at the end. + + Args: + text: The lines to extract text. Its comments and strings must be elided. + It can be single line and can span multiple lines. + start_pattern: The regexp string indicating where to start extracting + the text. + Returns: + The extracted text. + None if either the opening string or ending punctuation could not be found. + """ + # TODO(unknown): Audit cpplint.py to see what places could be profitably + # rewritten to use _GetTextInside (and use inferior regexp matching today). + + # Give opening punctuations to get the matching close-punctuations. + matching_punctuation = {'(': ')', '{': '}', '[': ']'} + closing_punctuation = set(matching_punctuation.itervalues()) + + # Find the position to start extracting text. + match = re.search(start_pattern, text, re.M) + if not match: # start_pattern not found in text. + return None + start_position = match.end(0) + + assert start_position > 0, ( + 'start_pattern must ends with an opening punctuation.') + assert text[start_position - 1] in matching_punctuation, ( + 'start_pattern must ends with an opening punctuation.') + # Stack of closing punctuations we expect to have in text after position. + punctuation_stack = [matching_punctuation[text[start_position - 1]]] + position = start_position + while punctuation_stack and position < len(text): + if text[position] == punctuation_stack[-1]: + punctuation_stack.pop() + elif text[position] in closing_punctuation: + # A closing punctuation without matching opening punctuations. + return None + elif text[position] in matching_punctuation: + punctuation_stack.append(matching_punctuation[text[position]]) + position += 1 + if punctuation_stack: + # Opening punctuations left without matching close-punctuations. + return None + # punctuations match. + return text[start_position:position - 1] + + +# Patterns for matching call-by-reference parameters. +# +# Supports nested templates up to 2 levels deep using this messy pattern: +# < (?: < (?: < [^<>]* +# > +# | [^<>] )* +# > +# | [^<>] )* +# > +_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* +_RE_PATTERN_TYPE = ( + r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' + r'(?:\w|' + r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' + r'::)+') +# A call-by-reference parameter ends with '& identifier'. +_RE_PATTERN_REF_PARAM = re.compile( + r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' + r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') +# A call-by-const-reference parameter either ends with 'const& identifier' +# or looks like 'const type& identifier' when 'type' is atomic. +_RE_PATTERN_CONST_REF_PARAM = ( + r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + + r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') +# Stream types. +_RE_PATTERN_REF_STREAM_PARAM = ( + r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') + + +def CheckLanguage(filename, clean_lines, linenum, file_extension, + include_state, nesting_state, error): + """Checks rules from the 'C++ language rules' section of cppguide.html. + + Some of these rules are hard to test (function overloading, using + uint32 inappropriately), but we do the best we can. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + include_state: An _IncludeState instance in which the headers are inserted. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # If the line is empty or consists of entirely a comment, no need to + # check it. + line = clean_lines.elided[linenum] + if not line: + return + + match = _RE_PATTERN_INCLUDE.search(line) + if match: + CheckIncludeLine(filename, clean_lines, linenum, include_state, error) + return + + # Reset include state across preprocessor directives. This is meant + # to silence warnings for conditional includes. + match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) + if match: + include_state.ResetSection(match.group(1)) + + # Make Windows paths like Unix. + fullname = os.path.abspath(filename).replace('\\', '/') + + # Perform other checks now that we are sure that this is not an include line + CheckCasts(filename, clean_lines, linenum, error) + CheckGlobalStatic(filename, clean_lines, linenum, error) + CheckPrintf(filename, clean_lines, linenum, error) + + if file_extension == 'h': + # TODO(unknown): check that 1-arg constructors are explicit. + # How to tell it's a constructor? + # (handled in CheckForNonStandardConstructs for now) + # TODO(unknown): check that classes declare or disable copy/assign + # (level 1 error) + pass + + # Check if people are using the verboten C basic types. The only exception + # we regularly allow is "unsigned short port" for port. + if Search(r'\bshort port\b', line): + if not Search(r'\bunsigned short port\b', line): + error(filename, linenum, 'runtime/int', 4, + 'Use "unsigned short" for ports, not "short"') + else: + match = Search(r'\b(short|long(?! +double)|long long)\b', line) + #if match: + # error(filename, linenum, 'runtime/int', 4, + # 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) + + # Check if some verboten operator overloading is going on + # TODO(unknown): catch out-of-line unary operator&: + # class X {}; + # int operator&(const X& x) { return 42; } // unary operator& + # The trick is it's hard to tell apart from binary operator&: + # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& + if Search(r'\boperator\s*&\s*\(\s*\)', line): + error(filename, linenum, 'runtime/operator', 4, + 'Unary operator& is dangerous. Do not use it.') + + # Check for suspicious usage of "if" like + # } if (a == b) { + if Search(r'\}\s*if\s*\(', line): + error(filename, linenum, 'readability/braces', 4, + 'Did you mean "else if"? If not, start a new line for "if".') + + # Check for potential format string bugs like printf(foo). + # We constrain the pattern not to pick things like DocidForPrintf(foo). + # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) + # TODO(unknown): Catch the following case. Need to change the calling + # convention of the whole function to process multiple line to handle it. + # printf( + # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); + printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') + if printf_args: + match = Match(r'([\w.\->()]+)$', printf_args) + if match and match.group(1) != '__VA_ARGS__': + function_name = re.search(r'\b((?:string)?printf)\s*\(', + line, re.I).group(1) + error(filename, linenum, 'runtime/printf', 4, + 'Potential format string bug. Do %s("%%s", %s) instead.' + % (function_name, match.group(1))) + + # Check for potential memset bugs like memset(buf, sizeof(buf), 0). + match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) + if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + error(filename, linenum, 'runtime/memset', 4, + 'Did you mean "memset(%s, 0, %s)"?' + % (match.group(1), match.group(2))) + + #if Search(r'\busing namespace\b', line): + # error(filename, linenum, 'build/namespaces', 5, + # 'Do not use namespace using-directives. ' + # 'Use using-declarations instead.') + + # Detect variable-length arrays. + match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) + if (match and match.group(2) != 'return' and match.group(2) != 'delete' and + match.group(3).find(']') == -1): + # Split the size using space and arithmetic operators as delimiters. + # If any of the resulting tokens are not compile time constants then + # report the error. + tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) + is_const = True + skip_next = False + for tok in tokens: + if skip_next: + skip_next = False + continue + + if Search(r'sizeof\(.+\)', tok): continue + if Search(r'arraysize\(\w+\)', tok): continue + + tok = tok.lstrip('(') + tok = tok.rstrip(')') + if not tok: continue + if Match(r'\d+', tok): continue + if Match(r'0[xX][0-9a-fA-F]+', tok): continue + if Match(r'k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue + # A catch all for tricky sizeof cases, including 'sizeof expression', + # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' + # requires skipping the next token because we split on ' ' and '*'. + if tok.startswith('sizeof'): + skip_next = True + continue + is_const = False + break + if not is_const: + error(filename, linenum, 'runtime/arrays', 1, + 'Do not use variable-length arrays. Use an appropriately named ' + "('k' followed by CamelCase) compile-time constant for the size.") + + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if (file_extension == 'h' + and Search(r'\bnamespace\s*{', line) + and line[-1] != '\\'): + error(filename, linenum, 'build/namespaces', 4, + 'Do not use unnamed namespaces in header files. See ' + 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' + ' for more information.') + + +def CheckGlobalStatic(filename, clean_lines, linenum, error): + """Check for unsafe global or static objects. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Match two lines at a time to support multiline declarations + if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): + line += clean_lines.elided[linenum + 1].strip() + + # Check for people declaring static/global STL strings at the top level. + # This is dangerous because the C++ language does not guarantee that + # globals with constructors are initialized before the first access, and + # also because globals can be destroyed when some threads are still running. + # TODO(unknown): Generalize this to also find static unique_ptr instances. + # TODO(unknown): File bugs for clang-tidy to find these. + match = Match( + r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' + r'([a-zA-Z0-9_:]+)\b(.*)', + line) + + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if (match and + not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and + not Search(r'\boperator\W', line) and + not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): + if Search(r'\bconst\b', line): + error(filename, linenum, 'runtime/string', 4, + 'For a static/global string constant, use a C style string ' + 'instead: "%schar%s %s[]".' % + (match.group(1), match.group(2) or '', match.group(3))) + else: + error(filename, linenum, 'runtime/string', 4, + 'Static/global string variables are not permitted.') + + if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or + Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): + error(filename, linenum, 'runtime/init', 4, + 'You seem to be initializing a member variable with itself.') + + +def CheckPrintf(filename, clean_lines, linenum, error): + """Check for printf related issues. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # When snprintf is used, the second argument shouldn't be a literal. + match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) + if match and match.group(2) != '0': + # If 2nd arg is zero, snprintf is used to calculate size. + error(filename, linenum, 'runtime/printf', 3, + 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' + 'to snprintf.' % (match.group(1), match.group(2))) + + # Check if some verboten C functions are being used. + if Search(r'\bsprintf\s*\(', line): + error(filename, linenum, 'runtime/printf', 5, + 'Never use sprintf. Use snprintf instead.') + match = Search(r'\b(strcpy|strcat)\s*\(', line) + if match: + error(filename, linenum, 'runtime/printf', 4, + 'Almost always, snprintf is better than %s' % match.group(1)) + + +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i]) + if match: + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1))) + return (closing_paren >= 0 and + Search(r'\boverride\b', line[closing_paren:])) + return False + + +def IsOutOfLineMethodDefinition(clean_lines, linenum): + """Check if current line contains an out-of-line method definition. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]): + return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None + return False + + +def IsInitializerList(clean_lines, linenum): + """Check if current line is inside constructor initializer list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in xrange(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = Match(r'^(.*)\{\s*$', line) + if remove_function_body: + line = remove_function_body.group(1) + + if Search(r'\s:\s*\w+[({]', line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if Search(r'\}\s*,\s*$', line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if Search(r'[{};]\s*$', line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False + + +def CheckForNonConstReference(filename, clean_lines, linenum, + nesting_state, error): + """Check for non-const references. + + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if '&' not in line: + return + + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return + + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return + + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): + # previous_line\n + ::current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', + clean_lines.elided[linenum - 1]) + elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): + # previous_line::\n + current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', + clean_lines.elided[linenum - 1]) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind('>') + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = '' + for i in xrange(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if (nesting_state.previous_stack_top and + not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or + isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not Search(r'[),]\s*$', previous_line): + break + if Match(r'^\s*:\s+\S', previous_line): + return + + # Avoid preprocessors + if Search(r'\\\s*$', line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): + return + + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|' + r'operator\s*[<>][<>]|' + r'static_assert|COMPILE_ASSERT' + r')\s*\(') + if Search(whitelisted_functions, line): + return + elif not Search(r'\S+\([^)]*$', line): + # Don't see a whitelisted function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in xrange(2): + if (linenum > i and + Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])): + return + + decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and + not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): + pass + #error(filename, linenum, 'runtime/references', 2, + # 'Is this a non-const reference? ' + # 'If so, make const or use a pointer: ' + + # ReplaceAll(' *<', '<', parameter)) + + +def CheckCasts(filename, clean_lines, linenum, error): + """Various cast related checks. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Check to see if they're using an conversion function cast. + # I just try to capture the most common basic types, though there are more. + # Parameterless conversion functions, such as bool(), are allowed as they are + # probably a member operator declaration or default constructor. + match = Search( + r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' + r'(int|float|double|bool|char|int32|uint32|int64|uint64)' + r'(\([^)].*)', line) + expecting_function = ExpectingFunctionArgs(clean_lines, linenum) + if match and not expecting_function: + matched_type = match.group(2) + + # matched_new_or_template is used to silence two false positives: + # - New operators + # - Template arguments with function types + # + # For template arguments, we match on types immediately following + # an opening bracket without any spaces. This is a fast way to + # silence the common case where the function type is the first + # template argument. False negative with less-than comparison is + # avoided because those operators are usually followed by a space. + # + # function // bracket + no space = false positive + # value < double(42) // bracket + space = true positive + matched_new_or_template = match.group(1) + + # Avoid arrays by looking for brackets that come after the closing + # parenthesis. + if Match(r'\([^()]+\)\s*\[', match.group(3)): + return + + # Other things to ignore: + # - Function pointers + # - Casts to pointer types + # - Placement new + # - Alias declarations + matched_funcptr = match.group(3) + if (matched_new_or_template is None and + not (matched_funcptr and + (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', + matched_funcptr) or + matched_funcptr.startswith('(*)'))) and + not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and + not Search(r'new\(\S+\)\s*' + matched_type, line)): + error(filename, linenum, 'readability/casting', 4, + 'Using deprecated casting style. ' + 'Use static_cast<%s>(...) instead' % + matched_type) + + if not expecting_function: + CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', + r'\((int|float|double|bool|char|u?int(16|32|64))\)', error) + + # This doesn't catch all cases. Consider (const char * const)"hello". + # + # (char *) "foo" should always be a const_cast (reinterpret_cast won't + # compile). + if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', + r'\((char\s?\*+\s?)\)\s*"', error): + pass + else: + # Check pointer casts for other than string constants + CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', + r'\((\w+\s?\*+\s?)\)', error) + + # In addition, we look for people taking the address of a cast. This + # is dangerous -- casts can assign to temporaries, so the pointer doesn't + # point where you think. + # + # Some non-identifier character is required before the '&' for the + # expression to be recognized as a cast. These are casts: + # expression = &static_cast(temporary()); + # function(&(int*)(temporary())); + # + # This is not a cast: + # reference_type&(int* function_param); + match = Search( + r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' + r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) + if match: + # Try a better error message when the & is bound to something + # dereferenced by the casted pointer, as opposed to the casted + # pointer itself. + parenthesis_error = False + match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + if match: + _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) + if x1 >= 0 and clean_lines.elided[y1][x1] == '(': + _, y2, x2 = CloseExpression(clean_lines, y1, x1) + if x2 >= 0: + extended_line = clean_lines.elided[y2][x2:] + if y2 < clean_lines.NumLines() - 1: + extended_line += clean_lines.elided[y2 + 1] + if Match(r'\s*(?:->|\[)', extended_line): + parenthesis_error = True + + if parenthesis_error: + error(filename, linenum, 'readability/casting', 4, + ('Are you taking an address of something dereferenced ' + 'from a cast? Wrapping the dereferenced expression in ' + 'parentheses will make the binding more obvious')) + else: + error(filename, linenum, 'runtime/casting', 4, + ('Are you taking an address of a cast? ' + 'This is dangerous: could be a temp var. ' + 'Take the address before doing the cast, rather than after')) + + +def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): + """Checks for a C-style cast by looking for the pattern. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + cast_type: The string for the C++ cast to recommend. This is either + reinterpret_cast, static_cast, or const_cast, depending. + pattern: The regular expression used to find C-style casts. + error: The function to call with any errors found. + + Returns: + True if an error was emitted. + False otherwise. + """ + line = clean_lines.elided[linenum] + match = Search(pattern, line) + if not match: + return False + + # Exclude lines with keywords that tend to look like casts + context = line[0:match.start(1) - 1] + if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): + return False + + # Try expanding current context to see if we one level of + # parentheses inside a macro. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 5), -1): + context = clean_lines.elided[i] + context + if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): + return False + + # operator++(int) and operator--(int) + if context.endswith(' operator++') or context.endswith(' operator--'): + return False + + # A single unnamed argument for a function tends to look like old style cast. + # If we see those, don't issue warnings for deprecated casts. + remainder = line[match.end(0):] + if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', + remainder): + return False + + # At this point, all that should be left is actual casts. + error(filename, linenum, 'readability/casting', 4, + 'Using C-style cast. Use %s<%s>(...) instead' % + (cast_type, match.group(1))) + + return True + + +def ExpectingFunctionArgs(clean_lines, linenum): + """Checks whether where function type arguments are expected. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + + Returns: + True if the line at 'linenum' is inside something that expects arguments + of function types. + """ + line = clean_lines.elided[linenum] + return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or + (linenum >= 2 and + (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', + clean_lines.elided[linenum - 1]) or + Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', + clean_lines.elided[linenum - 2]) or + Search(r'\bstd::m?function\s*\<\s*$', + clean_lines.elided[linenum - 1])))) + + +_HEADERS_CONTAINING_TEMPLATES = ( + ('', ('deque',)), + ('', ('unary_function', 'binary_function', + 'plus', 'minus', 'multiplies', 'divides', 'modulus', + 'negate', + 'equal_to', 'not_equal_to', 'greater', 'less', + 'greater_equal', 'less_equal', + 'logical_and', 'logical_or', 'logical_not', + 'unary_negate', 'not1', 'binary_negate', 'not2', + 'bind1st', 'bind2nd', + 'pointer_to_unary_function', + 'pointer_to_binary_function', + 'ptr_fun', + 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', + 'mem_fun_ref_t', + 'const_mem_fun_t', 'const_mem_fun1_t', + 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', + 'mem_fun_ref', + )), + ('', ('numeric_limits',)), + ('', ('list',)), + ('', ('map', 'multimap',)), + ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', + 'unique_ptr', 'weak_ptr')), + ('', ('queue', 'priority_queue',)), + ('', ('set', 'multiset',)), + ('', ('stack',)), + ('', ('char_traits', 'basic_string',)), + ('', ('tuple',)), + ('', ('unordered_map', 'unordered_multimap')), + ('', ('unordered_set', 'unordered_multiset')), + ('', ('pair',)), + ('', ('vector',)), + + # gcc extensions. + # Note: std::hash is their hash, ::hash is our hash + ('', ('hash_map', 'hash_multimap',)), + ('', ('hash_set', 'hash_multiset',)), + ('', ('slist',)), + ) + +_HEADERS_MAYBE_TEMPLATES = ( + ('', ('copy', 'max', 'min', 'min_element', 'sort', + 'transform', + )), + ('', ('forward', 'make_pair', 'move', 'swap')), + ) + +_RE_PATTERN_STRING = re.compile(r'\bstring\b') + +_re_pattern_headers_maybe_templates = [] +for _header, _templates in _HEADERS_MAYBE_TEMPLATES: + for _template in _templates: + # Match max(..., ...), max(..., ...), but not foo->max, foo.max or + # type::max(). + _re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), + _template, + _header)) + +# Other scripts may reach in and modify this pattern. +_re_pattern_templates = [] +for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: + for _template in _templates: + _re_pattern_templates.append( + (re.compile(r'(\<|\b)' + _template + r'\s*\<'), + _template + '<>', + _header)) + + +def FilesBelongToSameModule(filename_cc, filename_h): + """Check if these two filenames belong to the same module. + + The concept of a 'module' here is a as follows: + foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the + same 'module' if they are in the same directory. + some/path/public/xyzzy and some/path/internal/xyzzy are also considered + to belong to the same module here. + + If the filename_cc contains a longer path than the filename_h, for example, + '/absolute/path/to/base/sysinfo.cc', and this file would include + 'base/sysinfo.h', this function also produces the prefix needed to open the + header. This is used by the caller of this function to more robustly open the + header file. We don't have access to the real include paths in this context, + so we need this guesswork here. + + Known bugs: tools/base/bar.cc and base/bar.h belong to the same module + according to this implementation. Because of this, this function gives + some false positives. This should be sufficiently rare in practice. + + Args: + filename_cc: is the path for the .cc file + filename_h: is the path for the header path + + Returns: + Tuple with a bool and a string: + bool: True if filename_cc and filename_h belong to the same module. + string: the additional prefix needed to open the header file. + """ + + fileinfo = FileInfo(filename_cc) + if not fileinfo.IsSource(): + return (False, '') + filename_cc = filename_cc[:-len(fileinfo.Extension())] + matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] + filename_cc = filename_cc.replace('/public/', '/') + filename_cc = filename_cc.replace('/internal/', '/') + + if not filename_h.endswith('.h'): + return (False, '') + filename_h = filename_h[:-len('.h')] + if filename_h.endswith('-inl'): + filename_h = filename_h[:-len('-inl')] + filename_h = filename_h.replace('/public/', '/') + filename_h = filename_h.replace('/internal/', '/') + + files_belong_to_same_module = filename_cc.endswith(filename_h) + common_path = '' + if files_belong_to_same_module: + common_path = filename_cc[:-len(filename_h)] + return files_belong_to_same_module, common_path + + +def UpdateIncludeState(filename, include_dict, io=codecs): + """Fill up the include_dict with new includes found from the file. + + Args: + filename: the name of the header to read. + include_dict: a dictionary in which the headers are inserted. + io: The io factory to use to read the file. Provided for testability. + + Returns: + True if a header was successfully added. False otherwise. + """ + headerfile = None + try: + headerfile = io.open(filename, 'r', 'utf8', 'replace') + except IOError: + return False + linenum = 0 + for line in headerfile: + linenum += 1 + clean_line = CleanseComments(line) + match = _RE_PATTERN_INCLUDE.search(clean_line) + if match: + include = match.group(2) + include_dict.setdefault(include, linenum) + return True + + +def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, + io=codecs): + """Reports for missing stl includes. + + This function will output warnings to make sure you are including the headers + necessary for the stl containers and functions that you use. We only give one + reason to include a header. For example, if you use both equal_to<> and + less<> in a .h file, only one (the latter in the file) of these will be + reported as a reason to include the . + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + include_state: An _IncludeState instance. + error: The function to call with any errors found. + io: The IO factory to use to read the header file. Provided for unittest + injection. + """ + required = {} # A map of header name to linenumber and the template entity. + # Example of required: { '': (1219, 'less<>') } + + for linenum in xrange(clean_lines.NumLines()): + line = clean_lines.elided[linenum] + if not line or line[0] == '#': + continue + + # String is special -- it is a non-templatized type in STL. + matched = _RE_PATTERN_STRING.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[''] = (linenum, 'string') + + for pattern, template, header in _re_pattern_headers_maybe_templates: + if pattern.search(line): + required[header] = (linenum, template) + + # The following function is just a speed up, no semantics are changed. + if not '<' in line: # Reduces the cpu time usage by skipping lines. + continue + + for pattern, template, header in _re_pattern_templates: + matched = pattern.search(line) + if matched: + # Don't warn about IWYU in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, template) + + # The policy is that if you #include something in foo.h you don't need to + # include it again in foo.cc. Here, we will look at possible includes. + # Let's flatten the include_state include_list and copy it into a dictionary. + include_dict = dict([item for sublist in include_state.include_list + for item in sublist]) + + # Did we find the header for this file (if any) and successfully load it? + header_found = False + + # Use the absolute path so that matching works properly. + abs_filename = FileInfo(filename).FullName() + + # For Emacs's flymake. + # If cpplint is invoked from Emacs's flymake, a temporary file is generated + # by flymake and that file name might end with '_flymake.cc'. In that case, + # restore original file name here so that the corresponding header file can be + # found. + # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' + # instead of 'foo_flymake.h' + abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) + + # include_dict is modified during iteration, so we iterate over a copy of + # the keys. + header_keys = include_dict.keys() + for header in header_keys: + (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) + fullpath = common_path + header + if same_module and UpdateIncludeState(fullpath, include_dict, io): + header_found = True + + # If we can't find the header file for a .cc, assume it's because we don't + # know where to look. In that case we'll give up as we're not sure they + # didn't include it in the .h file. + # TODO(unknown): Do a better job of finding .h files so we are confident that + # not having the .h file means there isn't one. + if filename.endswith('.cc') and not header_found: + return + + # All the lines have been processed, report the errors found. + for required_header_unstripped in required: + template = required[required_header_unstripped][1] + if required_header_unstripped.strip('<>"') not in include_dict: + error(filename, required[required_header_unstripped][0], + 'build/include_what_you_use', 4, + 'Add #include ' + required_header_unstripped + ' for ' + template) + + +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') + + +def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): + """Check that make_pair's template arguments are deduced. + + G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are + specified explicitly, and such use isn't intended in any case. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) + if match: + error(filename, linenum, 'build/explicit_make_pair', + 4, # 4 = high confidence + 'For C++11-compatibility, omit template arguments from make_pair' + ' OR use pair directly OR if appropriate, construct a pair directly') + + +def CheckRedundantVirtual(filename, clean_lines, linenum, error): + """Check if line contains a redundant "virtual" function-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for "virtual" on current line. + line = clean_lines.elided[linenum] + virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) + if not virtual: return + + # Ignore "virtual" keywords that are near access-specifiers. These + # are only used in class base-specifier and do not apply to member + # functions. + if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or + Match(r'^\s+(public|protected|private)\b', virtual.group(3))): + return + + # Ignore the "virtual" keyword from virtual base classes. Usually + # there is a column on the same line in these cases (virtual base + # classes are rare in google3 because multiple inheritance is rare). + if Match(r'^.*[^:]:[^:].*$', line): return + + # Look for the next opening parenthesis. This is the start of the + # parameter list (possibly on the next line shortly after virtual). + # TODO(unknown): doesn't work if there are virtual functions with + # decltype() or other things that use parentheses, but csearch suggests + # that this is rare. + end_col = -1 + end_line = -1 + start_col = len(virtual.group(2)) + for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): + line = clean_lines.elided[start_line][start_col:] + parameter_list = Match(r'^([^(]*)\(', line) + if parameter_list: + # Match parentheses to find the end of the parameter list + (_, end_line, end_col) = CloseExpression( + clean_lines, start_line, start_col + len(parameter_list.group(1))) + break + start_col = 0 + + if end_col < 0: + return # Couldn't find end of parameter list, give up + + # Look for "override" or "final" after the parameter list + # (possibly on the next few lines). + for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): + line = clean_lines.elided[i][end_col:] + match = Search(r'\b(override|final)\b', line) + if match: + error(filename, linenum, 'readability/inheritance', 4, + ('"virtual" is redundant since function is ' + 'already declared as "%s"' % match.group(1))) + + # Set end_col to check whole lines after we are done with the + # first line. + end_col = 0 + if Search(r'[^\w]\s*$', line): + break + + +def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): + """Check if line contains a redundant "override" or "final" virt-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for closing parenthesis nearby. We need one to confirm where + # the declarator ends and where the virt-specifier starts to avoid + # false positives. + line = clean_lines.elided[linenum] + declarator_end = line.rfind(')') + if declarator_end >= 0: + fragment = line[declarator_end:] + else: + if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: + fragment = line + else: + return + + # Check that at most one of "override" or "final" is present, not both + if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): + error(filename, linenum, 'readability/inheritance', 4, + ('"override" is redundant since function is ' + 'already declared as "final"')) + + + + +# Returns true if we are at a new block, and it is directly +# inside of a namespace. +def IsBlockInNameSpace(nesting_state, is_forward_declaration): + """Checks that the new block is directly in a namespace. + + Args: + nesting_state: The _NestingState object that contains info about our state. + is_forward_declaration: If the class is a forward declared class. + Returns: + Whether or not the new block is directly in a namespace. + """ + if is_forward_declaration: + if len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo)): + return True + else: + return False + + return (len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.stack[-2], _NamespaceInfo)) + + +def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + raw_lines_no_comments, linenum): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, + linenum) + + if not (is_namespace_indent_item or is_forward_declaration): + return False + + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + + return IsBlockInNameSpace(nesting_state, is_forward_declaration) + + +# Call this method if the line is directly inside of a namespace. +# If the line above is blank (excluding comments) or the start of +# an inner namespace, it cannot be indented. +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, + error): + line = raw_lines_no_comments[linenum] + if Match(r'^\s+', line): + error(filename, linenum, 'runtime/indentation_namespace', 4, + 'Do not indent within a namespace') + + +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=[]): + """Processes a single line in the file. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + clean_lines: An array of strings, each representing a line of the file, + with comments stripped. + line: Number of line being processed. + include_state: An _IncludeState instance in which the headers are inserted. + function_state: A _FunctionState instance which counts function lines, etc. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error) + if nesting_state.InAsmBlock(): return + CheckForFunctionLengths(filename, clean_lines, line, function_state, error) + CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage(filename, clean_lines, line, file_extension, include_state, + nesting_state, error) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) + CheckForNonStandardConstructs(filename, clean_lines, line, + nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) + CheckInvalidIncrement(filename, clean_lines, line, error) + CheckMakePairUsesDeduction(filename, clean_lines, line, error) + CheckRedundantVirtual(filename, clean_lines, line, error) + CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) + +def FlagCxx11Features(filename, clean_lines, linenum, error): + """Flag those c++11 features that we only allow in certain places. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++ TR1 headers. + if include and include.group(1).startswith('tr1/'): + error(filename, linenum, 'build/c++tr1', 5, + ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1)) + + # Flag unapproved C++11 headers. + #if include and include.group(1) in ('cfenv', + # 'condition_variable', + # 'fenv.h', + # 'future', + # 'mutex', + # 'thread', + # 'chrono', + # 'ratio', + # 'regex', + # 'system_error', + # ): + # error(filename, linenum, 'build/c++11', 5, + # ('<%s> is an unapproved C++11 header.') % include.group(1)) + + # The only place where we need to worry about C++11 keywords and library + # features in preprocessor directives is in macro definitions. + if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return + + # These are classes and free functions. The classes are always + # mentioned as std::*, but we only catch the free functions if + # they're not found by ADL. They're alphabetical by header. + for top_name in ( + # type_traits + 'alignment_of', + 'aligned_union', + ): + if Search(r'\bstd::%s\b' % top_name, line): + error(filename, linenum, 'build/c++11', 5, + ('std::%s is an unapproved C++11 class or function. Send c-style ' + 'an example of where it would make your code more readable, and ' + 'they may let you use it.') % top_name) + + +def FlagCxx14Features(filename, clean_lines, linenum, error): + """Flag those C++14 features that we restrict. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++14 headers. + if include and include.group(1) in ('scoped_allocator', 'shared_mutex'): + error(filename, linenum, 'build/c++14', 5, + ('<%s> is an unapproved C++14 header.') % include.group(1)) + + +def ProcessFileData(filename, file_extension, lines, error, + extra_check_functions=[]): + """Performs lint checks and reports any errors to the given error function. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + lines = (['// marker so line numbers and indices both start at 1'] + lines + + ['// marker so line numbers end in a known way']) + + include_state = _IncludeState() + function_state = _FunctionState() + nesting_state = NestingState() + + ResetNolintSuppressions() + + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppresions(lines) + RemoveMultiLineComments(filename, lines, error) + clean_lines = CleansedLines(lines) + + if file_extension == 'h': + CheckForHeaderGuard(filename, clean_lines, error) + + for line in xrange(clean_lines.NumLines()): + ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions) + FlagCxx11Features(filename, clean_lines, line, error) + nesting_state.CheckCompletedBlocks(filename, error) + + CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + + # We check here rather than inside ProcessLine so that we see raw + # lines rather than "cleaned" lines. + CheckForBadCharacters(filename, lines, error) + + CheckForNewlineAtEOF(filename, lines, error) + +def ProcessConfigOverrides(filename): + """ Loads the configuration files and processes the config overrides. + + Args: + filename: The name of the file being processed by the linter. + + Returns: + False if the current |filename| should not be processed further. + """ + + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. + + cfg_file = os.path.join(abs_path, "CPPLINT.cfg") + abs_filename = abs_path + if not os.path.isfile(cfg_file): + continue + + try: + with open(cfg_file) as file_handle: + for line in file_handle: + line, _, _ = line.partition('#') # Remove comments. + if not line.strip(): + continue + + name, _, val = line.partition('=') + name = name.strip() + val = val.strip() + if name == 'set noparent': + keep_looking = False + elif name == 'filter': + cfg_filters.append(val) + elif name == 'exclude_files': + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + sys.stderr.write('Ignoring "%s": file excluded by "%s". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % + (filename, cfg_file, base_name, val)) + return False + elif name == 'linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + sys.stderr.write('Line length must be numeric.') + elif name == 'root': + global _root + _root = val + else: + sys.stderr.write( + 'Invalid configuration option (%s) in file %s\n' % + (name, cfg_file)) + + except IOError: + sys.stderr.write( + "Skipping config file '%s': Can't open for reading\n" % cfg_file) + keep_looking = False + + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for filter in reversed(cfg_filters): + _AddFilters(filter) + + return True + + +def ProcessFile(filename, vlevel, extra_check_functions=[]): + """Does google-lint on a single file. + + Args: + filename: The name of the file to parse. + + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. + + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + + _SetVerboseLevel(vlevel) + _BackupFilters() + + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return + + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == '-': + lines = codecs.StreamReaderWriter(sys.stdin, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace').read().split('\n') + else: + lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n') + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith('\r'): + lines[linenum] = lines[linenum].rstrip('\r') + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + sys.stderr.write( + "Skipping input '%s': Can't open for reading\n" % filename) + _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + file_extension = filename[filename.rfind('.') + 1:] + + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + if filename != '-' and file_extension not in _valid_extensions: + sys.stderr.write('Ignoring %s; not a valid file name ' + '(%s)\n' % (filename, ', '.join(_valid_extensions))) + else: + ProcessFileData(filename, file_extension, lines, Error, + extra_check_functions) + + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error(filename, linenum, 'whitespace/newline', 1, + 'Unexpected \\r (^M) found; better to use only \\n') + + sys.stderr.write('Done processing %s\n' % filename) + _RestoreFilters() + + +def PrintUsage(message): + """Prints a brief usage string and exits, optionally with an error message. + + Args: + message: The optional error message. + """ + sys.stderr.write(_USAGE) + if message: + sys.exit('\nFATAL ERROR: ' + message) + else: + sys.exit(1) + + +def PrintCategories(): + """Prints a list of all the error-categories used by error messages. + + These are the categories used to filter messages via --filter. + """ + sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) + sys.exit(0) + + +def ParseArguments(args): + """Parses the command line arguments. + + This may set the output format and verbosity level as side-effects. + + Args: + args: The command line arguments: + + Returns: + The list of filenames to lint. + """ + try: + (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', + 'counting=', + 'filter=', + 'root=', + 'linelength=', + 'extensions=']) + except getopt.GetoptError: + PrintUsage('Invalid arguments.') + + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = '' + counting_style = '' + + for (opt, val) in opts: + if opt == '--help': + PrintUsage(None) + elif opt == '--output': + if val not in ('emacs', 'vs7', 'eclipse'): + PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.') + output_format = val + elif opt == '--verbose': + verbosity = int(val) + elif opt == '--filter': + filters = val + if not filters: + PrintCategories() + elif opt == '--counting': + if val not in ('total', 'toplevel', 'detailed'): + PrintUsage('Valid counting options are total, toplevel, and detailed') + counting_style = val + elif opt == '--root': + global _root + _root = val + elif opt == '--linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + PrintUsage('Line length must be digits.') + elif opt == '--extensions': + global _valid_extensions + try: + _valid_extensions = set(val.split(',')) + except ValueError: + PrintUsage('Extensions must be comma seperated list.') + + if not filenames: + PrintUsage('No files were specified.') + + _SetOutputFormat(output_format) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + return filenames + + +def main(): + filenames = ParseArguments(sys.argv[1:]) + + # Change stderr to write with replacement characters so we don't die + # if we try to print something containing non-ASCII characters. + sys.stderr = codecs.StreamReaderWriter(sys.stderr, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace') + + _cpplint_state.ResetErrorCounts() + for filename in filenames: + ProcessFile(filename, _cpplint_state.verbose_level) + _cpplint_state.PrintErrorCounts() + + sys.exit(_cpplint_state.error_count > 0) + + +if __name__ == '__main__': + main() diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt index 1d68d56d92ded9..bd1793f8e0a260 100644 --- a/inference-engine/src/CMakeLists.txt +++ b/inference-engine/src/CMakeLists.txt @@ -8,8 +8,6 @@ set (CMAKE_CXX_STANDARD 11) set (CMAKE_CXX_STANDARD_REQUIRED ON) #################################### -add_subdirectory(inference_engine) - if(ENABLE_MKL_DNN) add_subdirectory(mkldnn_plugin) endif() @@ -26,6 +24,8 @@ if (ENABLE_GNA) add_subdirectory(gna_plugin) endif() +add_subdirectory(inference_engine) + add_subdirectory(hetero_plugin) set(InferenceEngine_LIBRARIES inference_engine) diff --git a/inference-engine/src/cldnn_engine/CMakeLists.txt b/inference-engine/src/cldnn_engine/CMakeLists.txt index a2d81c326b530c..211f6600286c95 100644 --- a/inference-engine/src/cldnn_engine/CMakeLists.txt +++ b/inference-engine/src/cldnn_engine/CMakeLists.txt @@ -4,75 +4,27 @@ set (TARGET_NAME "clDNNPlugin") -file (GLOB MAIN_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp - ) - -file (GLOB LIBRARY_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp - ) +file(GLOB MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +file(GLOB LIBRARY_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h) addVersionDefines(cldnn_engine.cpp CI_BUILD_NUMBER CLDNN_VERSION) -add_definitions(-DIMPLEMENT_INFERENCE_ENGINE_PLUGIN) - -# Create named folders for the sources within the .vcproj -# Empty name lists them directly under the .vcproj -source_group("src" FILES ${LIBRARY_SRC}) -source_group("include" FILES ${LIBRARY_HEADERS}) +ie_add_plugin(NAME ${TARGET_NAME} + DEVICE_NAME "GPU" + SOURCES ${MAIN_SRC} ${LIBRARY_HEADERS} + VERSION_DEFINES_FOR cldnn_engine.cpp) - -#TODO: clDNN for non windows... -if (APPLE) -elseif (UNIX) - set(CLDNN_BUILD_PLATFORM Centos7/) - set(CLDNN_PLATFORM Linux64/) - set(CLDNN_LIB_FOLDER bin) - set(CLDNN_LIB_NAME clDNN64) -else () - #32 bits platform - if (NOT "${CMAKE_SIZEOF_VOID_P}" EQUAL "8") - set(CLDNN_PLATFORM Windows32/) - set(CLDNN_LIB_NAME clDNN32) - else() - set(CLDNN_PLATFORM Windows64/) - set(CLDNN_LIB_NAME clDNN64) - endif() - set(CLDNN_LIB_FOLDER lib) -endif () +target_link_libraries(${TARGET_NAME} PRIVATE ${INTEL_ITT_LIBS} inference_engine clDNN_shlib pugixml) set (CLDNN_TOP_FOLDER ${IE_MAIN_SOURCE_DIR}/thirdparty/clDNN) - -# Properties->C/C++->General->Additional Include Directories -include_directories ( - ${CMAKE_CURRENT_SOURCE_DIR} - ${IE_MAIN_SOURCE_DIR}/include +target_include_directories(${TARGET_NAME} PRIVATE ${CLDNN_TOP_FOLDER}/api ${CLDNN_TOP_FOLDER}/include - #${OCL_DIST}/include ${IE_MAIN_SOURCE_DIR}/src/inference_engine ${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src) -# clDnn build configuration, change that if you'd like to debug -if (${CMAKE_BUILD_TYPE} STREQUAL "Debug" ) - set (Configuration ${CMAKE_BUILD_TYPE}) -else() - set (Configuration Release) -endif() +# copy default global xml file describing the custom kernels and the *.cl files -set(CLDNN_LIBRARY clDNN_shlib) - -# Create library file from sources. -add_library(${TARGET_NAME} SHARED - ${MAIN_SRC} - ${LIBRARY_HEADERS}) -target_link_libraries(${TARGET_NAME} ${INTEL_ITT_LIBS} inference_engine ${CLDNN_LIBRARY}) - -set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}) - -#copy default global xml file describing the custom kernels and the *.cl files add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND "${CMAKE_COMMAND}" -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/cldnn_global_custom_kernels $/cldnn_global_custom_kernels) - -add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}) + COMMAND "${CMAKE_COMMAND}" -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/cldnn_global_custom_kernels + $/cldnn_global_custom_kernels) diff --git a/inference-engine/src/cldnn_engine/cldnn_async_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_async_infer_request.cpp new file mode 100644 index 00000000000000..7a2f155fb51c60 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_async_infer_request.cpp @@ -0,0 +1,24 @@ +// Copyright (C) 2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cldnn_async_infer_request.h" +#include + +CLDNNPlugin::CLDNNAsyncInferRequest::CLDNNAsyncInferRequest(const InferenceEngine::InferRequestInternal::Ptr &inferRequest, + const InferenceEngine::ITaskExecutor::Ptr &taskExecutor, + const InferenceEngine::TaskSynchronizer::Ptr &taskSynchronizer, + const InferenceEngine::ITaskExecutor::Ptr &callbackExecutor) + : InferenceEngine::AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, taskSynchronizer, callbackExecutor) + { } + +CLDNNPlugin::CLDNNAsyncInferRequest::~CLDNNAsyncInferRequest() { + waitAllAsyncTasks(); +} + +void CLDNNPlugin::CLDNNAsyncInferRequest::Infer() { + _callbackManager.disableCallback(); + StartAsync(); + Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY); + _callbackManager.enableCallback(); +} diff --git a/inference-engine/src/cldnn_engine/cldnn_async_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_async_infer_request.h new file mode 100644 index 00000000000000..dfa467c0ff1bf7 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_async_infer_request.h @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include "cldnn_infer_request.h" + +namespace CLDNNPlugin { + +class CLDNNAsyncInferRequest : virtual public InferenceEngine::AsyncInferRequestThreadSafeDefault { +public: + CLDNNAsyncInferRequest(const InferenceEngine::InferRequestInternal::Ptr &inferRequest, + const InferenceEngine::ITaskExecutor::Ptr &taskExecutor, + const InferenceEngine::TaskSynchronizer::Ptr &taskSynchronizer, + const InferenceEngine::ITaskExecutor::Ptr &callbackExecutor); + + ~CLDNNAsyncInferRequest() override; + + void Infer() override; +}; + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_config.cpp b/inference-engine/src/cldnn_engine/cldnn_config.cpp new file mode 100644 index 00000000000000..1a002c35169e05 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_config.cpp @@ -0,0 +1,246 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include "cldnn_config.h" +#include "cpp_interfaces/exception2status.hpp" + +#if defined(_WIN32) +#define mkdir(dir, mode) _mkdir(dir) +#endif + +using namespace InferenceEngine; + +namespace CLDNNPlugin { + +void Config::UpdateFromMap(const std::map& configMap) { + for (auto& kvp : configMap) { + std::string key = kvp.first; + std::string val = kvp.second; + + if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0) { + if (val.compare(PluginConfigParams::YES) == 0) { + useProfiling = true; + } else if (val.compare(PluginConfigParams::NO) == 0) { + useProfiling = false; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; + } + } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) { + if (val.compare(PluginConfigParams::YES) == 0) { + enableDynamicBatch = true; + } else if (val.compare(PluginConfigParams::NO) == 0) { + enableDynamicBatch = false; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; + } + } else if (key.compare(PluginConfigParams::KEY_DUMP_KERNELS) == 0) { + if (val.compare(PluginConfigParams::YES) == 0) { + dumpCustomKernels = true; + } else if (val.compare(PluginConfigParams::NO) == 0) { + dumpCustomKernels = false; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; + } + } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) { + std::stringstream ss(val); + uint32_t uVal(0); + ss >> uVal; + if (ss.fail()) { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; + } + switch (uVal) { + case 0: + queuePriority = cldnn::priority_mode_types::disabled; + break; + case 1: + queuePriority = cldnn::priority_mode_types::low; + break; + case 2: + queuePriority = cldnn::priority_mode_types::med; + break; + case 3: + queuePriority = cldnn::priority_mode_types::high; + break; + default: + THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue priority value: " << uVal; + } + + } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) { + std::stringstream ss(val); + uint32_t uVal(0); + ss >> uVal; + if (ss.fail()) { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; + } + switch (uVal) { + case 0: + queueThrottle = cldnn::throttle_mode_types::disabled; + break; + case 1: + queueThrottle = cldnn::throttle_mode_types::low; + break; + case 2: + queueThrottle = cldnn::throttle_mode_types::med; + break; + case 3: + queueThrottle = cldnn::throttle_mode_types::high; + break; + default: + THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue throttle value: " << uVal; + } + } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) { + std::stringstream ss(val); + std::istream_iterator begin(ss); + std::istream_iterator end; + std::vector configFiles(begin, end); + for (auto& file : configFiles) { + CLDNNCustomLayer::LoadFromFile(file, customLayers); + } + } else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) { + if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) { + tuningConfig.mode = cldnn::tuning_mode::tuning_disabled; + } else if (val.compare(PluginConfigParams::TUNING_CREATE) == 0) { + tuningConfig.mode = cldnn::tuning_mode::tuning_tune_and_cache; + } else if (val.compare(PluginConfigParams::TUNING_USE_EXISTING) == 0) { + tuningConfig.mode = cldnn::tuning_mode::tuning_use_cache; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported tuning mode value by plugin: " << val; + } + } else if (key.compare(PluginConfigParams::KEY_TUNING_FILE) == 0) { + tuningConfig.cache_file_path = val; + } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MEM_POOL) == 0) { + if (val.compare(PluginConfigParams::YES) == 0) { + memory_pool_on = true; + } else if (val.compare(PluginConfigParams::NO) == 0) { + memory_pool_on = false; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported memory pool flag value: " << val; + } + } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) { + if (!val.empty()) { + graph_dumps_dir = val; + if (mkdir(graph_dumps_dir.c_str(), 0755) != 0) { + THROW_IE_EXCEPTION << "Couldn't create clDNN graph dump directory!"; + } + } + } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) { + if (!val.empty()) { + sources_dumps_dir = val; + if (mkdir(sources_dumps_dir.c_str(), 0755) != 0) { + THROW_IE_EXCEPTION << "Couldn't create clDNN source dump directory!"; + } + } + } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) { + if (val.compare(PluginConfigParams::YES) == 0) { + exclusiveAsyncRequests = true; + } else if (val.compare(PluginConfigParams::NO) == 0) { + exclusiveAsyncRequests = false; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; + } + } else if (key.compare(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) == 0) { + if (val.compare(PluginConfigParams::GPU_THROUGHPUT_AUTO) == 0) { + throughput_streams = 2; + } else { + int val_i; + try { + val_i = std::stoi(val); + } catch (const std::exception&) { + THROW_IE_EXCEPTION << "Wrong value for property key " << PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS + << ". Expected only positive numbers (#streams) or " + << "PluginConfigParams::GPU_THROUGHPUT_AUTO"; + } + if (val_i > 0) + throughput_streams = static_cast(val_i); + } + } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_INT8_ENABLED) == 0) { + if (val.compare(PluginConfigParams::YES) == 0) { + enableInt8 = true; + } else if (val.compare(PluginConfigParams::NO) == 0) { + enableInt8 = false; + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; + } + } else { + THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property key by plugin: " << key; + } + + adjustKeyMapValues(); + } +} + +void Config::adjustKeyMapValues() { + if (useProfiling) + key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES; + else + key_config_map[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::NO; + + if (dumpCustomKernels) + key_config_map[PluginConfigParams::KEY_DUMP_KERNELS] = PluginConfigParams::YES; + else + key_config_map[PluginConfigParams::KEY_DUMP_KERNELS] = PluginConfigParams::NO; + + if (exclusiveAsyncRequests) + key_config_map[PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = PluginConfigParams::YES; + else + key_config_map[PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS] = PluginConfigParams::NO; + + if (memory_pool_on) + key_config_map[CLDNNConfigParams::KEY_CLDNN_MEM_POOL] = PluginConfigParams::YES; + else + key_config_map[CLDNNConfigParams::KEY_CLDNN_MEM_POOL] = PluginConfigParams::NO; + + if (enableDynamicBatch) + key_config_map[PluginConfigParams::KEY_DYN_BATCH_ENABLED] = PluginConfigParams::YES; + else + key_config_map[PluginConfigParams::KEY_DYN_BATCH_ENABLED] = PluginConfigParams::NO; + + if (enableInt8) + key_config_map[CLDNNConfigParams::KEY_CLDNN_INT8_ENABLED] = PluginConfigParams::YES; + else + key_config_map[CLDNNConfigParams::KEY_CLDNN_INT8_ENABLED] = PluginConfigParams::NO; + + { + std::string qp = "0"; + switch (queuePriority) { + case cldnn::priority_mode_types::low: qp = "1"; break; + case cldnn::priority_mode_types::med: qp = "2"; break; + case cldnn::priority_mode_types::high: qp = "3"; break; + default: break; + } + key_config_map[CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY] = qp; + } + { + std::string qt = "0"; + switch (queueThrottle) { + case cldnn::throttle_mode_types::low: qt = "1"; break; + case cldnn::throttle_mode_types::med: qt = "2"; break; + case cldnn::throttle_mode_types::high: qt = "3"; break; + default: break; + } + key_config_map[CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE] = qt; + } + { + std::string tm = PluginConfigParams::TUNING_DISABLED; + switch (tuningConfig.mode) { + case cldnn::tuning_mode::tuning_tune_and_cache: tm = PluginConfigParams::TUNING_CREATE; break; + case cldnn::tuning_mode::tuning_use_cache: tm = PluginConfigParams::TUNING_USE_EXISTING; break; + default: break; + } + key_config_map[PluginConfigParams::KEY_TUNING_MODE] = tm; + if (!tuningConfig.cache_file_path.empty()) + key_config_map[PluginConfigParams::KEY_TUNING_FILE] = tuningConfig.cache_file_path; + } + + if (!graph_dumps_dir.empty()) + key_config_map[CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR] = graph_dumps_dir; + if (!sources_dumps_dir.empty()) + key_config_map[CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR] = sources_dumps_dir; + + key_config_map[PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS] = std::to_string(throughput_streams); +} +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_config.h b/inference-engine/src/cldnn_engine/cldnn_config.h new file mode 100644 index 00000000000000..cf863fb5447c43 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_config.h @@ -0,0 +1,62 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "ie_blob.h" +#include "ie_plugin.hpp" +#include "cpp/ie_cnn_network.h" +#include "debug_options.h" +#include "inference_engine.hpp" + +#include "cldnn_custom_layer.h" + +#include + +namespace CLDNNPlugin { + +struct Config { + Config() : throughput_streams(1), + useProfiling(false), + dumpCustomKernels(false), + exclusiveAsyncRequests(false), + memory_pool_on(true), + enableDynamicBatch(false), + enableInt8(false), + queuePriority(cldnn::priority_mode_types::disabled), + queueThrottle(cldnn::throttle_mode_types::disabled), + max_dynamic_batch(1), + customLayers({}), + tuningConfig(), + graph_dumps_dir(""), + sources_dumps_dir("") { + adjustKeyMapValues(); + } + + void UpdateFromMap(const std::map& configMap); + void adjustKeyMapValues(); + + uint16_t throughput_streams; + bool useProfiling; + bool dumpCustomKernels; + bool exclusiveAsyncRequests; + bool memory_pool_on; + bool enableDynamicBatch; + bool enableInt8; + cldnn::priority_mode_types queuePriority; + cldnn::throttle_mode_types queueThrottle; + int max_dynamic_batch; + CLDNNCustomLayerMap customLayers; + cldnn::tuning_config_options tuningConfig; + std::string graph_dumps_dir; + std::string sources_dumps_dir; + + std::map key_config_map; +}; + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index fab02d34c84959..8aba309dff5652 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -10,6 +10,9 @@ #include #include #include +#include + +#include "ie_metric_helpers.hpp" #include #include #include @@ -25,7 +28,7 @@ #undef max #include "cldnn_engine.h" -#include "cldnn_graph.h" +#include "cldnn_executable_network.h" #include "cldnn_custom_layer.h" #ifdef __linux__ @@ -41,12 +44,31 @@ using namespace details; namespace CLDNNPlugin { struct clDNNEngine::impl { - CLDNNGraph::Config m_config; + CLDNNPlugin::Config m_config; }; clDNNEngine::clDNNEngine() { - _impl = new impl; + _pluginName = "GPU"; + _impl = std::make_shared(); + + // try loading clDNN engine and get info from it + { + cldnn::engine info_engine(cldnn::engine_configuration( + false, + false, + false, + std::string(), + std::string(), + true, + std::string(), + std::string(), + cldnn::priority_mode_types::disabled, + cldnn::throttle_mode_types::disabled, + true, + 1)); + engine_info = info_engine.get_info(); + } // locate global custom kernel config // and auto-load kernels from it #ifdef _WIN32 @@ -73,50 +95,44 @@ clDNNEngine::clDNNEngine() { CLDNNCustomLayer::LoadFromFile(config_path, _impl->m_config.customLayers, true); } -clDNNEngine::~clDNNEngine() { - if (_impl) { - delete _impl; - _impl = nullptr; - } -} - -ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(InferenceEngine::ICNNNetwork &network, +ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICore * /*core*/, InferenceEngine::ICNNNetwork &network, const std::map &config) { + IE_SUPPRESS_DEPRECATED_START auto specifiedDevice = network.getTargetDevice(); auto supportedDevice = InferenceEngine::TargetDevice::eGPU; if (specifiedDevice != InferenceEngine::TargetDevice::eDefault && specifiedDevice != supportedDevice) { THROW_IE_EXCEPTION << "The plugin doesn't support target device: " << getDeviceName(specifiedDevice) << ".\n" << "Supported target device: " << getDeviceName(supportedDevice); } + IE_SUPPRESS_DEPRECATED_END - CLDNNGraph::Config conf = this->_impl->m_config; - conf.LoadFromMap(config); + CLDNNPlugin::Config conf = this->_impl->m_config; + conf.UpdateFromMap(config); // verification of supported input InferenceEngine::InputsDataMap _networkInputs; network.getInputsInfo(_networkInputs); for (auto ii : _networkInputs) { - auto input_precision = ii.second->getInputPrecision(); + auto input_precision = ii.second->getTensorDesc().getPrecision(); if (input_precision != InferenceEngine::Precision::FP16 && input_precision != InferenceEngine::Precision::I16 - && input_precision != InferenceEngine::Precision::FP32 && input_precision != InferenceEngine::Precision::U8) { + && input_precision != InferenceEngine::Precision::FP32 && input_precision != InferenceEngine::Precision::U8 + && input_precision != InferenceEngine::Precision::I32) { THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "Input image format " << input_precision << " is not supported yet..."; } } - // todo: handle input precision differently - per input and not one per network... - int max_batch = -1; if (conf.enableDynamicBatch) { - max_batch = network.getBatchSize(); + conf.max_dynamic_batch = static_cast(network.getBatchSize()); } - return std::make_shared(network, conf, max_batch); + return std::make_shared(network, conf); } INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept { try { plugin = make_ie_compatible_plugin( - {1, 6, + {2, 0, CI_BUILD_NUMBER, "clDNNPlugin"}, std::make_shared()); return OK; @@ -127,7 +143,7 @@ INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, R } void clDNNEngine::SetConfig(const std::map &config) { - _impl->m_config.LoadFromMap(config); + _impl->m_config.UpdateFromMap(config); } void clDNNEngine::QueryNetwork(const ICNNNetwork& network, QueryNetworkResult& res) const { @@ -153,7 +169,10 @@ void clDNNEngine::QueryNetwork(const ICNNNetwork& network, const std::map()(layer->type, "Const")) { nextLayerDependent.push_back(layer); } else if (CLDNNGraph::IsLayerSupported(layer->type)) { + res.supportedLayersMap.insert({ layer->name, GetName() }); + IE_SUPPRESS_DEPRECATED_START res.supportedLayers.insert(layer->name); + IE_SUPPRESS_DEPRECATED_END } } @@ -166,13 +185,17 @@ void clDNNEngine::QueryNetwork(const ICNNNetwork& network, const std::mapgetCreatorLayer().lock(); // verify if previous layer is not supported or if it in the list of not defined layers yet // not defined layers are treated as layers which will be assigned to GPU if next layer is assigned to GPU - if (res.supportedLayers.find(prev->name) == res.supportedLayers.end() + if (res.supportedLayersMap.find(prev->name) == res.supportedLayersMap.end() && std::find(nextLayerDependent.begin(), nextLayerDependent.end(), prev) == nextLayerDependent.end()) { supported = false; } } - if (supported) + if (supported) { + res.supportedLayersMap.insert({ concat->name, GetName() }); + IE_SUPPRESS_DEPRECATED_START res.supportedLayers.insert(concat->name); + IE_SUPPRESS_DEPRECATED_END + } } // evaluation of constant blobs - if all consumers are on GPU, @@ -182,18 +205,81 @@ void clDNNEngine::QueryNetwork(const ICNNNetwork& network, const std::mapoutData) { - for (auto ol : out->inputTo) { - if (res.supportedLayers.find(ol.second->name) == res.supportedLayers.end()) { + for (auto ol : out->getInputTo()) { + if (res.supportedLayersMap.find(ol.second->name) == res.supportedLayersMap.end()) { supported = false; } } } std::cout << (*cnl)->name << " is " << (supported ? "GPU" : "CPU") << std::endl; - if (supported) + if (supported) { + IE_SUPPRESS_DEPRECATED_START res.supportedLayers.insert((*cnl)->name); + IE_SUPPRESS_DEPRECATED_END + res.supportedLayersMap.insert({ (*cnl)->name, GetName() }); + } } } +Parameter clDNNEngine::GetConfig(const std::string& name, const std::map& /*options*/) const { + Parameter result; + auto option = _impl->m_config.key_config_map.find(name); + if (option != _impl->m_config.key_config_map.end()) { + result = option->second; + } else { + THROW_IE_EXCEPTION << "Unsupported config key : " << name; + } + return result; +} + +Parameter clDNNEngine::GetMetric(const std::string& name, const std::map& /*options*/) const { + if (name == METRIC_KEY(SUPPORTED_METRICS)) { + std::vector metrics; + metrics.push_back(METRIC_KEY(AVAILABLE_DEVICES)); + metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); + metrics.push_back(METRIC_KEY(FULL_DEVICE_NAME)); + metrics.push_back(METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + metrics.push_back(METRIC_KEY(NUMBER_OF_WAITING_INFER_REQUESTS)); + metrics.push_back(METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS)); + metrics.push_back(METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)); + metrics.push_back(METRIC_KEY(RANGE_FOR_STREAMS)); + IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics); + } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) { + std::vector availableDevices = { "" }; + IE_SET_METRIC_RETURN(AVAILABLE_DEVICES, availableDevices); + } else if (name == METRIC_KEY(FULL_DEVICE_NAME)) { + IE_SET_METRIC_RETURN(FULL_DEVICE_NAME, std::string(engine_info.ocl_device_name)); + } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { + std::vector configKeys; + for (auto opt : _impl->m_config.key_config_map) + configKeys.push_back(opt.first); + IE_SET_METRIC_RETURN(SUPPORTED_CONFIG_KEYS, configKeys); + } else if (name == METRIC_KEY(OPTIMIZATION_CAPABILITIES)) { + std::vector capabilities; + + capabilities.push_back(METRIC_VALUE(FP32)); + capabilities.push_back(METRIC_VALUE(BIN)); + if (engine_info.supports_fp16) + capabilities.push_back(METRIC_VALUE(FP16)); + if (engine_info.supports_imad || engine_info.supports_immad) + capabilities.push_back(METRIC_VALUE(INT8)); + + IE_SET_METRIC_RETURN(OPTIMIZATION_CAPABILITIES, capabilities); + } else if (name == METRIC_KEY(NUMBER_OF_WAITING_INFER_REQUESTS)) { + IE_SET_METRIC_RETURN(NUMBER_OF_WAITING_INFER_REQUESTS, CLDNNExecNetwork::GetWaitingCounter()); + } else if (name == METRIC_KEY(NUMBER_OF_EXEC_INFER_REQUESTS)) { + IE_SET_METRIC_RETURN(NUMBER_OF_EXEC_INFER_REQUESTS, CLDNNExecNetwork::GetRunningCounter()); + } else if (name == METRIC_KEY(RANGE_FOR_ASYNC_INFER_REQUESTS)) { + std::tuple range = std::make_tuple(1, 2, 1); + IE_SET_METRIC_RETURN(RANGE_FOR_ASYNC_INFER_REQUESTS, range); + } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) { + std::tuple range = std::make_tuple(1, 2); + IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range); + } else { + THROW_IE_EXCEPTION << "Unsupported metric key " << name; + } +} }; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.h b/inference-engine/src/cldnn_engine/cldnn_engine.h index 6241a946326b88..1fb31908393b97 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.h +++ b/inference-engine/src/cldnn_engine/cldnn_engine.h @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace CLDNNPlugin { @@ -16,19 +17,20 @@ using CLDNNCustomLayerPtr = std::shared_ptr; class clDNNEngine : public InferenceEngine::InferencePluginInternal { struct impl; - impl *_impl; + std::shared_ptr _impl; + cldnn::engine_info engine_info; public: clDNNEngine(); - virtual ~clDNNEngine(); - - InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(InferenceEngine::ICNNNetwork &network, + InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(const InferenceEngine::ICore * core, InferenceEngine::ICNNNetwork &network, const std::map &config) override; void SetConfig(const std::map &config) override; + InferenceEngine::Parameter GetConfig(const std::string& name, const std::map& options) const override; + InferenceEngine::Parameter GetMetric(const std::string& name, const std::map& options) const override; /** - * @depricated Use the version with config parameter + * @deprecated Use the version with config parameter */ void QueryNetwork(const InferenceEngine::ICNNNetwork& network, InferenceEngine::QueryNetworkResult& res) const override; void QueryNetwork(const InferenceEngine::ICNNNetwork& network, diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp new file mode 100644 index 00000000000000..77fcfe490a21b3 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp @@ -0,0 +1,149 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include + +#include "ie_metric_helpers.hpp" +#include +#include +#include +#include +#include +#include "cldnn_graph.h" + +#include +#include +#include +#include +#include +#include "cldnn_infer_request.h" +#include +#include "details/caseless.hpp" +#include "cldnn_async_infer_request.h" +#include +#include +#include + +#include +#include "cldnn_executable_network.h" +#include "cldnn_streams_task_executor.h" +#include "../mkldnn_plugin/mkldnn_streams.h" + + +using namespace InferenceEngine; +using namespace InferenceEngine::details; + +namespace CLDNNPlugin { +unsigned int CLDNNExecNetwork::GetWaitingCounter() { return MultiWorkerTaskExecutor::GetWaitingCounter(); } +unsigned int CLDNNExecNetwork::GetRunningCounter() { return CLDNNInferRequest::GetRunningCounter(); } + +CLDNNExecNetwork::CLDNNExecNetwork(InferenceEngine::ICNNNetwork &network, const Config& config) : m_config(config) { + // graph(s) initialization in taskExecutor threads (streams), in parallel (in case of streams) + std::vector tasks; + + auto graph_base = std::make_shared(network, m_config, 0); + for (uint16_t n = 0; n < m_config.throughput_streams; n++) { + auto graph = n == 0 ? graph_base : std::make_shared(graph_base, n); + m_graphs.push_back(graph); + auto task = std::make_shared([=]() { + CLDNNPlugin::MultiWorkerTaskExecutor::ptrContext.ptrGraph = graph; + }); + tasks.push_back(task); + } + + if (m_config.throughput_streams > 1) { + // special executor with as many threads as requested #streams, each with it's own initialization task + _taskExecutor = std::make_shared(tasks); + } else { + if (m_config.exclusiveAsyncRequests) { + ExecutorManager *executorManager = ExecutorManager::getInstance(); + _taskExecutor = executorManager->getExecutor("GPU"); + } + } + + for (auto& t : tasks) + t->checkException(); +} + +InferRequestInternal::Ptr CLDNNExecNetwork::CreateInferRequestImpl(InputsDataMap networkInputs, + OutputsDataMap networkOutputs) { + if (m_graphs.empty()) { + THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str; + } + + for (auto& graph : m_graphs) { + if (graph == nullptr) { + THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str; + } + + if (!graph->IsLoaded()) { + THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str << ": no networks created"; + } + } + + auto ptr = std::make_shared(networkInputs, networkOutputs); + if (m_config.throughput_streams > 1) { + ptr->EnableStreams(); + } + if (m_config.useProfiling) + ptr->EnableProfiling(); + ptr->SetGraph(m_graphs.front()); + + return ptr; +} + +void CLDNNExecNetwork::CreateInferRequest(IInferRequest::Ptr &asyncRequest) { + auto syncRequestImpl = this->CreateInferRequestImpl(_networkInputs, _networkOutputs); + syncRequestImpl->setPointerToExecutableNetworkInternal(shared_from_this()); + + auto asyncTreadSafeImpl = std::make_shared( + syncRequestImpl, _taskExecutor, _taskSynchronizer, _callbackExecutor); + + asyncRequest.reset(new InferRequestBase(asyncTreadSafeImpl), [](IInferRequest *p) { p->Release(); }); + asyncTreadSafeImpl->SetPointerToPublicInterface(asyncRequest); +} + +void CLDNNExecNetwork::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) { + if (m_graphs.empty()) + THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str; + + m_graphs.front()->GetExecGraphInfo(graphPtr); +} + +void CLDNNExecNetwork::GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const { + auto option = m_config.key_config_map.find(name); + if (option != m_config.key_config_map.end()) { + result = option->second; + } else { + THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork config key: " << name; + } +} + +void CLDNNExecNetwork::GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const { + if (name == METRIC_KEY(NETWORK_NAME)) { + IE_ASSERT(!m_graphs.empty()); + result = IE_SET_METRIC(NETWORK_NAME, m_graphs[0]->getName()); + } else if (name == METRIC_KEY(SUPPORTED_METRICS)) { + std::vector metrics; + metrics.push_back(METRIC_KEY(NETWORK_NAME)); + metrics.push_back(METRIC_KEY(SUPPORTED_METRICS)); + metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)); + result = IE_SET_METRIC(SUPPORTED_METRICS, metrics); + } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) { + std::vector configKeys; + for (auto && value : m_config.key_config_map) + configKeys.push_back(value.first); + result = IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys); + } else if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) { + unsigned int nr = m_config.throughput_streams * 2u; + result = IE_SET_METRIC(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr); + } else { + THROW_IE_EXCEPTION << "Unsupported ExecutableNetwork metric: " << name; + } +} + +}; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.h b/inference-engine/src/cldnn_engine/cldnn_executable_network.h new file mode 100644 index 00000000000000..81b82ea7dccecc --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.h @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "ie_blob.h" +#include "ie_plugin.hpp" +#include "cpp/ie_cnn_network.h" +#include "debug_options.h" +#include "inference_engine.hpp" +#include +#include "cldnn_graph.h" +#include "cldnn_config.h" + +namespace CLDNNPlugin { + +class CLDNNExecNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault { +public: + typedef std::shared_ptr Ptr; + + explicit CLDNNExecNetwork(InferenceEngine::ICNNNetwork &network, const Config& config = {}); + + void GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) override; + void CreateInferRequest(InferenceEngine::IInferRequest::Ptr &asyncRequest) override; + InferenceEngine::InferRequestInternal::Ptr CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, + InferenceEngine::OutputsDataMap networkOutputs) override; + + static unsigned int GetWaitingCounter(); + static unsigned int GetRunningCounter(); + void GetMetric(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override; + void GetConfig(const std::string &name, InferenceEngine::Parameter &result, InferenceEngine::ResponseDesc *resp) const override; + + std::vector> m_graphs; + Config m_config; +}; + +}; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml index a39cc1bdd3aba0..9cc50359ebeac4 100644 --- a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml +++ b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml @@ -37,8 +37,8 @@ - - + + diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp index d9164b24954746..b1f98c98877c0e 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp @@ -7,46 +7,9 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -64,3700 +27,599 @@ #include #include #include +#include using namespace InferenceEngine; using namespace InferenceEngine::details; -#ifndef NDEBUG -#include -#include -#define THROW_CLDNN_EXCEPTION(desc)\ -do { \ -InferenceEngineException ex(__FILE__, __LINE__);\ -std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \ -__LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0); -#else -#define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc; -#endif // NDEBUG -#define TensorValue(val) static_cast(val) - namespace CLDNNPlugin { -const cldnn::primitive_id CLDNNGraph::m_preProcessTag("_cldnn_input_preprocess"); -const cldnn::primitive_id CLDNNGraph::m_weightsTag("_cldnn_weights"); -const cldnn::primitive_id CLDNNGraph::m_biasesTag("_cldnn_biases"); -const cldnn::primitive_id CLDNNGraph::m_meanValuesTag("_cldnn_mean_values"); -const cldnn::primitive_id CLDNNGraph::m_postProcessTag("_cldnn_output_postprocess"); -const cldnn::primitive_id CLDNNGraph::m_scalesTag("_cldnn_scales"); -const cldnn::primitive_id CLDNNGraph::m_workaroundTag("_cldnn_workaround"); -const cldnn::primitive_id CLDNNGraph::m_preCustomLayerTag("_cldnn_custom_preprocess"); -const cldnn::primitive_id CLDNNGraph::m_postCustomLayerTag("_cldnn_custom_postprocess"); - -static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) { // todo: add more checks - if (inputs && layer->insData.size() != inputs) { - THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name); - } - if (layer->_fusedWith) { - THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name); +CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config, uint16_t stream_id) + : m_config(config) + , m_networkName(network.getName()) + , m_stream_id(stream_id) { + m_engine = std::make_shared(cldnn::engine_configuration( + (m_config.useProfiling || (m_config.tuningConfig.mode != cldnn::tuning_mode::tuning_disabled)), + false, + m_config.dumpCustomKernels, + std::string(), + std::string(), + true, + std::string(), + m_config.sources_dumps_dir, + m_config.queuePriority, + m_config.queueThrottle, + m_config.memory_pool_on, + m_config.throughput_streams)); + + m_program = std::make_shared(network, m_engine, m_config); + Build(); +} + +CLDNNGraph::CLDNNGraph(std::shared_ptr graph, uint16_t stream_id) + : m_config(graph->m_config) + , m_engine(graph->m_engine) + , m_program(graph->m_program) + , m_networkName(graph->m_networkName) + , m_stream_id(stream_id) { + Build(); +} + +void CLDNNGraph::UpdateLayersMaps() { + primitiveIDs = m_program->primitiveIDs; + primitivesToIRLayersMap = m_program->primitivesToIRLayersMap; + prevPrimitiveIDs = m_program->prevPrimitiveIDs; + profilingIDs = m_program->profilingIDs; + perfMap = m_program->perfMap; + outputDims = m_program->outputDims; +} + +void CLDNNGraph::Build() { + UpdateLayersMaps(); + + if (GetMaxDynamicBatchSize() > 1) { + int m_bv_sz = m_program->GetMaxBatchSizeForSingleProgram(); + for (int b = m_bv_sz - 1; b >= 0; b--) { + m_networks.insert(m_networks.begin(), BuildNetwork(m_program->getCompiledProgram(b))); + m_engine->release_pending_memory(m_stream_id); + } + } else { + m_networks.emplace_back(BuildNetwork(m_program->getCompiledProgram())); + m_engine->release_pending_memory(m_stream_id); } -} -static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) { - if (layer->_fusedWith) { - THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name); - } + UpdateImplementationsMap(); } -#if defined(_WIN32) -#define mkdir(dir, mode) _mkdir(dir) -#endif - -void CLDNNGraph::Config::LoadFromMap(const std::map& configMap) { - for (auto& kvp : configMap) { - std::string key = kvp.first; - std::string val = kvp.second; - - // TODO: refactor if-else to map? - if (key.compare(PluginConfigParams::KEY_PERF_COUNT) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - useProfiling = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - useProfiling = false; - } else { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; - } - } else if (key.compare(PluginConfigParams::KEY_DYN_BATCH_ENABLED) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - enableDynamicBatch = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - enableDynamicBatch = false; - } else { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; - } - } else if (key.compare(PluginConfigParams::KEY_DUMP_KERNELS) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - dumpCustomKernels = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - dumpCustomKernels = false; - } else { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; - } - } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_PRIORITY) == 0) { - std::stringstream ss(val); - uint32_t uVal(0); - ss >> uVal; - if (ss.fail()) { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; - } - switch (uVal) { - case 0: - queuePriority = cldnn::priority_mode_types::disabled; - break; - case 1: - queuePriority = cldnn::priority_mode_types::low; - break; - case 2: - queuePriority = cldnn::priority_mode_types::med; - break; - case 3: - queuePriority = cldnn::priority_mode_types::high; - break; - default: - THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue priority value: " << uVal; - break; - } +std::shared_ptr CLDNNGraph::BuildNetwork(std::shared_ptr program) { + auto network = std::make_shared(*program, m_stream_id); - } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) == 0) { - std::stringstream ss(val); - uint32_t uVal(0); - ss >> uVal; - if (ss.fail()) { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; - } - switch (uVal) { - case 0: - queueThrottle = cldnn::throttle_mode_types::disabled; - break; - case 1: - queueThrottle = cldnn::throttle_mode_types::low; - break; - case 2: - queueThrottle = cldnn::throttle_mode_types::med; - break; - case 3: - queueThrottle = cldnn::throttle_mode_types::high; - break; - default: - THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "Unsupported queue throttle value: " << uVal; - break; - } - } else if (key.compare(PluginConfigParams::KEY_CONFIG_FILE) == 0) { - std::stringstream ss(val); - std::istream_iterator begin(ss); - std::istream_iterator end; - std::vector configFiles(begin, end); - for (auto& file : configFiles) { - CLDNNCustomLayer::LoadFromFile(file, customLayers); - } - } else if (key.compare(PluginConfigParams::KEY_TUNING_MODE) == 0) { - if (val.compare(PluginConfigParams::TUNING_DISABLED) == 0) { - tuningConfig.mode = cldnn::tuning_mode::tuning_disabled; - } else if (val.compare(PluginConfigParams::TUNING_CREATE) == 0) { - tuningConfig.mode = cldnn::tuning_mode::tuning_tune_and_cache; - } else if (val.compare(PluginConfigParams::TUNING_USE_EXISTING) == 0) { - tuningConfig.mode = cldnn::tuning_mode::tuning_use_cache; - } else { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported tuning mode value by plugin: " << val; - } - } else if (key.compare(PluginConfigParams::KEY_TUNING_FILE) == 0) { - tuningConfig.cache_file_path = val; - } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_MEM_POOL) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - memory_pool_on = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - memory_pool_on = false; - } else { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported memory pool flag value: " << val; - } - } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_GRAPH_DUMPS_DIR) == 0) { - if (!val.empty()) { - graph_dumps_dir = val; - if (mkdir(graph_dumps_dir.c_str(), 0755) != 0) { - THROW_IE_EXCEPTION << "Couldn't create clDNN graph dump directory!"; - } - } - } else if (key.compare(CLDNNConfigParams::KEY_CLDNN_SOURCES_DUMPS_DIR) == 0) { - if (!val.empty()) { - sources_dumps_dir = val; - if (mkdir(sources_dumps_dir.c_str(), 0755) != 0) { - THROW_IE_EXCEPTION << "Couldn't create clDNN source dump directory!"; - } - } - } else if (key.compare(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS) == 0) { - if (val.compare(PluginConfigParams::YES) == 0) { - exclusiveAsyncRequests = true; - } else if (val.compare(PluginConfigParams::NO) == 0) { - exclusiveAsyncRequests = false; - } else { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property value by plugin: " << val; - } - } else { - THROW_IE_EXCEPTION << NOT_FOUND_str << "Unsupported property key by plugin: " << key; + if (!m_config.graph_dumps_dir.empty() && m_stream_id == 0) { + static int net_id = 0; + auto steps_info = network->get_optimization_steps_info(); + size_t step_idx = 0; + for (auto& step : steps_info) { + CNNNetwork net(GetExecGraphInfoByPrimitivesInfo(step.second, false)); + net.serialize(m_config.graph_dumps_dir + std::to_string(net_id) + "_" + + std::to_string(step_idx) + "_" + step.first + "_graph.xml"); + step_idx++; } + net_id++; } -} -void CLDNNGraph::changeInputBatch(size_t batch) { - m_curBatch = batch; + return network; } -bool CLDNNGraph::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const { - InputsDataMap inputs; - network.getInputsInfo(inputs); - - CNNLayerSet inputLayers; - std::unordered_set allLayers; - - if (inputs.empty()) - return false; - - auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo(); - if (secondLayers.empty()) - return false; - - bool check_result = true; - details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) { - auto type = LayerTypeFromStr(layer->type); - if (SimplerNMS == type || - ROIPooling == type || - PriorBox == type || - DetectionOutput == type || - Reshape == type || - Permute == type || - Flatten == type || - Proposal == type || - PSROIPooling == type ) { - check_result = false; - } - - // check for custom layer - auto customLayer = m_config.customLayers.find(layer->type); - if (customLayer != m_config.customLayers.end()) { - check_result = false; +InferenceEngine::ICNNNetwork::Ptr CLDNNGraph::GetExecGraphInfoByPrimitivesInfo(std::vector& primitives_info, + bool filter_const_primitives) { + auto net = std::make_shared(); + net->setPrecision(Precision::FP32); + net->setName("runtime_gpu_graph"); + if (m_config.useProfiling) { + try { + // Update may throw an exception for step-by-step runtime graph dump, + // since network->get_executed_primitives() method can't be called before network execution + UpdatePerfStatistics(); + } catch (std::exception&) { } - }, false); - - return check_result; -} - -CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config, int max_batch) : m_config(config), - m_defaultFormat(cldnn::format::bfyx), - m_curBatch(-1) { - m_env.engine = std::make_shared(cldnn::engine_configuration( - (config.useProfiling || (config.tuningConfig.mode != cldnn::tuning_mode::tuning_disabled)), - false, - config.dumpCustomKernels, - std::string(), - std::string(), - true, - std::string(), - config.sources_dumps_dir, - config.queuePriority, - config.queueThrottle, - config.memory_pool_on)); -#if 0 - m_env.debugOptions.PrintOptions(); -#endif - if (config.exclusiveAsyncRequests) { - ExecutorManager *executorManager = ExecutorManager::getInstance(); - _taskExecutor = executorManager->getExecutor(TargetDeviceInfo::name(TargetDevice::eGPU)); } - bool res = !NetPass::CombineRNNSeq(network) ? NetPass::UnrollTI(network) : true; - res &= NetPass::UnrollRNN_if(network, [] (const RNNCellBase& rnn) -> bool { - if (rnn.clip != 0.0f) - return true; - if (rnn.type == "GRUCell" || - rnn.type == "GRUSequence" || - rnn.type == "RNNCell" || - rnn.type == "RNNSequence") - return true; - if (!(rnn.type == "LSTMCell" || rnn.type == "LSTMSequence") || - rnn.activations == std::vector{"sigmoid", "tanh", "tanh"}) - return false; - return true; - }); - - if (!res) - THROW_CLDNN_EXCEPTION("Plugin doesn't support Tensor Iterator in pure form. " - "No one TI optimization pattern was not applied successfully"); - - if (max_batch > 1) { - // check topology for applicability - if (!CanProcessDynBatch(network)) { - THROW_CLDNN_EXCEPTION("Such topology cannot be compiled for dynamic batch!"); - } - - // calculate number of networks necessary based on binary log - unsigned int tmp = max_batch; - unsigned int mask = 1 << 31; - unsigned int ldigit = 31; - - while (!(tmp & mask)) { - mask >>= 1; - ldigit--; - } - - m_env.m_bv_sz = ldigit + 1; - } else { - m_env.m_bv_sz = 0; - } + std::vector> node2layer; - m_env.m_max_batch = max_batch; - - // Handle workarounds - char networkName[128] = { 0 }; - network.getName(networkName, 127); - m_env.debugOptions.EnableWA(networkName); - m_env.debugOptions.AddTimedEvent("Loading Begin"); - - if (max_batch > 1) { - for (int b = m_env.m_bv_sz - 1; b >= 0; b--) { - m_topology = std::make_shared(cldnn::topology()); - m_env.network.reset(); - m_env.inputLayouts.clear(); - m_env.outputDims.clear(); - m_env.primitiveIDs.clear(); - - changeInputBatch(1 << b); - Load(network); - CompileNetwork(); - m_env.batchNetworks.insert(m_env.batchNetworks.begin(), m_env.network); - - m_topology.reset(); - m_env.engine->release_pending_memory(); + auto data_type_to_precision = [](cldnn::data_types dt) { + switch (dt) { + case cldnn::data_types::bin: return Precision::BIN; + case cldnn::data_types::f32: return Precision::FP32; + case cldnn::data_types::f16: return Precision::FP16; + case cldnn::data_types::i32: return Precision::I32; + case cldnn::data_types::u8: return Precision::U8; + case cldnn::data_types::i8: return Precision::I8; + default: return Precision::UNSPECIFIED; } - } else { - m_topology = std::make_shared(cldnn::topology()); - Load(network); - CompileNetwork(); - m_topology.reset(); - m_env.engine->release_pending_memory(); - } - - m_env.debugOptions.AddTimedEvent("Loading", "Loading Begin"); - m_env.debugOptions.PrintTimedEvents(); - m_env.debugOptions.ClearTimedEvents(); -} + }; -template -LayerTypePtr as(const CNNLayerPtr& in_ptr) { - auto result_ptr = dynamic_cast (in_ptr.get()); - if (nullptr == result_ptr) { - THROW_IE_EXCEPTION << "CNNLayerPtr is not suitable for casting to requested layer type"; - } - return result_ptr; -} + auto to_IE_type_name = [](const std::string& cldnn_name) -> std::string{ + static std::map type_n2l { + { "activation", "Activation" }, + { "arg_max_min", "ArgMax" }, + { "average_unpooling", "AverageUnpooling" }, + { "batch_norm", "BatchNormalization" }, + { "binary_convolution", "BinaryConvolution" }, + { "border", "Pad" }, + { "concatenation", "Concat" }, + { "convolution", "Convolution" }, + { "deformable_convolution", "DeformableConvolution" }, + { "crop", "Crop" }, + { "custom_gpu_primitive", "CustomGPUPrimitive" }, + { "data", "Const" }, + { "deconvolution", "Deconvolution" }, + { "depth_to_space", "DepthToSpace" }, + { "detection_output", "DetectionOutput" }, + { "eltwise", "Eltwise" }, + { "fully_connected", "FullyConnected" }, + { "gather", "Gather" }, + { "gemm", "Gemm" }, + { "input_layout", "Input" }, + { "lrn", "LRN" }, + { "lstm", "LSTM" }, + { "lstm_elt", "LSTM_Eltwise" }, + { "lstm_gemm", "LSTM_Gemm" }, + { "mvn", "MVN" }, + { "normalize", "Normalize" }, + { "permute", "Permute" }, + { "pooling", "Pooling" }, + { "prior_box", "PriorBox" }, + { "proposal", "Proposal" }, + { "quantize", "Quantize" }, + { "region_yolo", "RegionYolo" }, + { "reorder", "Reorder" }, + { "reorg_yolo", "ReorgYolo" }, + { "reshape", "Reshape" }, + { "reverse_sequence", "ReverseSequence" }, + { "roi_pooling", "ROIPooling" }, + { "scale", "ScaleShift" }, + { "shuffle_channels", "ShuffleChannels" }, + { "softmax", "SoftMax" }, + { "split", "Split" }, + { "strided_slice", "StridedSlice" }, + { "tile", "Tile" }, + { "upsampling", "Upsampling" }, + { "reduce_max", "ReduceMax" }, + { "reduce_min", "ReduceMin" }, + { "reduce_mean", "ReduceMean" }, + { "reduce_prod", "ReduceProd" }, + { "reduce_sum", "ReduceSum" }, + { "reduce_and", "ReduceAnd" }, + { "reduce_or", "ReduceOr" }, + { "reduce_sum_square", "ReduceSumSquare" }, + { "reduce_l1", "ReduceL1" }, + { "reduce_l2", "ReduceL2" }, + { "reduce_log_sum", "ReduceLogSum" }, + { "reduce_log_sum_exp", "ReduceLogSumExp" } + }; -inline std::string layer_type_name_ID(InferenceEngine::CNNLayer* layer) { - return layer->type + ":" + layer->name; -} + if (type_n2l.find(cldnn_name) != type_n2l.end()) + return type_n2l.at(cldnn_name); -inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) { - return layer_type_name_ID(layer.get()); -} + return cldnn_name; + }; -std::vector CLDNNGraph::GetNextLayers(const InferenceEngine::DataPtr data) { - std::vector nextLayers; - if (data == nullptr) { - return nextLayers; - } - for (auto nl : data->getInputTo()) { - nextLayers.push_back(nl.second); - } - return nextLayers; -} + auto concat_strings = [](std::vector strs, char sep) -> std::string { + if (strs.empty()) + return ""; -std::vector CLDNNGraph::GetNextLayers(const InferenceEngine::CNNLayerPtr layer) { - std::vector nextLayers; - if (layer == nullptr) { - return nextLayers; - } - for (auto od : layer->outData) { - auto nextLayersVec = GetNextLayers(od); - for (auto nl : nextLayersVec) { - nextLayers.push_back(nl); + std::string res = strs[0]; + for (size_t i = 1; i < strs.size(); i++) { + res += sep + strs[i]; } - } - return nextLayers; -} - -InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::DataPtr data) { - if (data == nullptr) { - return nullptr; - } - auto nextLayers = GetNextLayers(data); - IE_ASSERT(nextLayers.size() == 1); - return nextLayers[0]; -} - -InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer) { - if (layer == nullptr) { - return nullptr; - } - auto nextLayers = GetNextLayers(layer); - IE_ASSERT(nextLayers.size() == 1); - return nextLayers[0]; -} - -void CLDNNGraph::InitFormat(InferenceEngine::ICNNNetwork &network) { - m_defaultFormat = FormatFromLayout(InferenceEngine::Layout::NCHW); -} - -void CLDNNGraph::CompileNetwork() { - m_env.debugOptions.AddTimedEvent("Network Build Begin"); - cldnn::build_options options; - if (!m_config.graph_dumps_dir.empty()) { - options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir)); - } - options.set_option(cldnn::build_option::optimize_data(true)); - options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig)); - - m_env.network.reset(); - m_env.network = std::make_shared(cldnn::network(*(m_env.engine), *m_topology, options)); - m_env.debugOptions.AddTimedEvent("Network Build", "Network Build Begin"); -} -void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) { - InitFormat(network); - auto _networkPrecision = network.getPrecision(); - - // 1. create inputs - InferenceEngine::InputsDataMap networkInputs; - network.getInputsInfo(networkInputs); - p_currentInputs = &networkInputs; + return res; + }; - InferenceEngine::OutputsDataMap networkOutputs; - network.getOutputsInfo(networkOutputs); - p_currentOutputs = &networkOutputs; + auto remove_type_from_name = [](const std::string& name) -> std::string { + auto it = std::find(name.begin(), name.end(), ':'); + if (it == name.end() || (it + 1) == name.end()) + return name; - if (networkInputs.size() == 0) { - THROW_CLDNN_EXCEPTION("No inputs detected."); - } + return std::string((it+1), name.end()); + }; - using LayerVect = std::vector; - std::list layersToHandle; + auto find_origin_layers = [&](const std::string& name) -> std::vector { + if (primitivesToIRLayersMap.find(name) == primitivesToIRLayersMap.end()) + return {}; - auto push_if = [&](const LayerVect& clist) { - for (auto& l : clist) { - if ( (std::find_if( layersToHandle.begin(), - layersToHandle.end(), - [&](const CNNLayerPtr& x) { return layer_type_name_ID(x) == layer_type_name_ID(l); } )) == layersToHandle.end() ) - layersToHandle.push_back(l); - } + return primitivesToIRLayersMap.at(name); }; - auto allInputs = CNNNetGetAllInputLayers(network); - for (auto input : allInputs) { - if (LayerTypeFromStr(input->type) == ConstantBlob) { - AddConstantBlobInput(input); - } else { - auto iter = networkInputs.find(input->name); // regular input - if (iter != networkInputs.end()) { - AddInputPrimitive(iter->second, input->precision); - } - } - // collect next layers to process - push_if(GetNextLayers(input)); - } - - // 2. traverse layers - unsigned infLoopProtection = 0; - while (!layersToHandle.empty()) { - if (infLoopProtection++ >= layersToHandle.size()) { - THROW_CLDNN_EXCEPTION("Infinite loop during network creation"); - break; - } - InferenceEngine::CNNLayerPtr currLayer = layersToHandle.front(); - layersToHandle.pop_front(); - auto layerName = layer_type_name_ID(currLayer); + auto create_layer = [&](const cldnn::primitive_info& prim_info) -> CNNLayer::Ptr { + CNNLayer::Ptr layer(new CNNLayer({"name", "type", Precision::UNSPECIFIED})); - if (m_env.primitiveIDs.find(layerName) != m_env.primitiveIDs.end()) { - infLoopProtection = 0; - continue; // this layer was already added (had multiple inputs) - } + layer->name = remove_type_from_name(prim_info.original_id); + layer->type = to_IE_type_name(prim_info.type_id); + layer->precision = data_type_to_precision(prim_info.output_layout.data_type); + std::vector originalNames{find_origin_layers(prim_info.original_id)}; + for (auto& fused_id : prim_info.c_fused_ids.cpp_ids) + for (auto& origin_id : find_origin_layers(fused_id)) + originalNames.push_back(origin_id); - bool missingInput = false; - try { - GetPrevLayersPrimitives(currLayer); - } catch (std::exception) { - missingInput = true; - } + std::sort(originalNames.begin(), originalNames.end()); + originalNames.erase(std::unique(originalNames.begin(), originalNames.end()), originalNames.end()); - if (missingInput) { // some inputs aren't created yet - layersToHandle.push_back(currLayer); // push the current layer to the end of the line - continue; // move on to the next layer + layer->params[ExecGraphInfoSerialization::ORIGINAL_NAMES] = concat_strings(originalNames, ','); + layer->params[ExecGraphInfoSerialization::IMPL_TYPE] = prim_info.kernel_id; + layer->params[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = layer->precision.name(); + std::string exec_time = "not_executed"; + if (perfMap.find(prim_info.original_id) != perfMap.end()) { + auto perfCounter = perfMap.at(prim_info.original_id).second; + if (perfCounter.num > 0) { + exec_time = std::to_string(perfCounter.realTime_avg()); + } } - infLoopProtection = 0; // found a layer with all inputs already existing - CreateSingleLayerPrimitive(currLayer); // currLayer will be advanced if layer was skipped or merged - m_env.prevPrimitiveIDs[layerName] = GetPrevLayersPrimitives(currLayer); - - push_if(GetNextLayers(currLayer)); - } - - // 3. Handle output reordering - for (auto output : networkOutputs) { - // always reorder and let clDNN remove unneeded reorders - AddOutputPrimitive(output.first, output.second); - } - - // 4. ??? - // 5. profit - p_currentInputs = nullptr; - p_currentOutputs = nullptr; -} - -CLDNNGraph::LayerType CLDNNGraph::LayerTypeFromStr(const std::string &str) { - static const caseless_map LayerNameToType = { - { "Convolution" , Convolution }, - { "ReLU" , ReLU }, - { "ReLU6" , ReLU6 }, - { "Sigmoid" , Sigmoid }, - { "Logistic" , Sigmoid }, - { "TanH" , TanH }, - { "ELU" , ELU }, - { "Activation" , Activation }, - { "Exp" , Exp }, - { "Not" , Not }, - { "Norm" , LRN }, - { "Pooling" , Pooling }, - { "FullyConnected" , FullyConnected }, - { "SoftMax" , SoftMax }, - { "Power" , Power }, - { "Split" , Split }, - { "Slice" , Split }, - { "Concat" , Concatenate }, - { "Eltwise" , Eltwise }, - { "SimplerNMS" , SimplerNMS }, - { "ROIPooling" , ROIPooling }, - { "Crop" , Crop }, - { "Deconvolution" , Deconvolution }, - { "PriorBox" , PriorBox }, - { "DetectionOutput" , DetectionOutput }, - { "Normalize" , Normalize }, - { "Reshape" , Reshape }, - { "Permute" , Permute }, - { "Flatten" , Flatten }, - { "BatchNormalization" , BatchNormalization }, - { "PReLU" , PReLU }, - { "ScaleShift" , ScaleShift }, - { "Proposal" , Proposal }, - { "PSROIPooling" , PSROIPooling }, - { "Clamp" , Clamp }, - { "Copy" , Copy }, - { "Upsampling" , Upsampling }, - { "Resample" , Resample }, - { "RegionYolo" , RegionYolo }, - { "ReorgYolo" , ReorgYolo }, - { "Const" , ConstantBlob }, - { "ArgMax" , ArgMax }, - { "MVN" , MVN }, - { "Unpooling" , Unpooling }, - { "Tile" , Tile }, - { "Pad" , Pad }, - { "LSTMCell" , LSTMCell }, - { "LSTMSequence" , RNN }, - { "RNNSequence" , RNN }, - { "Gather" , Gather }, - { "DepthToSpace" , DepthToSpace }, - { "ShuffleChannels" , ShuffleChannels }, - { "StridedSlice" , StridedSlice }, - { "ReverseSequence" , ReverseSequence } - }; - auto it = LayerNameToType.find(str); - if (it != LayerNameToType.end()) - return it->second; - else - return NO_TYPE; -} - -cldnn::pooling_mode CLDNNGraph::PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding) { - switch (pt) { - case InferenceEngine::PoolingLayer::PoolType::MAX: - return cldnn::pooling_mode::max; - case InferenceEngine::PoolingLayer::PoolType::AVG: - return excludePadding ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average; - default: IE_ASSERT(0); // unhandled pool mode - THROW_CLDNN_EXCEPTION("Unsupported pooling type: " << pt); - break; - } - - return cldnn::pooling_mode::max; // shouldn't get here -} + layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = exec_time; + layer->params[ExecGraphInfoSerialization::OUTPUT_LAYOUTS] = prim_info.layout_str; + layer->params[ExecGraphInfoSerialization::EXECUTION_ORDER] = std::to_string(prim_info.exec_id); -cldnn::eltwise_mode CLDNNGraph::EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op) { - switch (op) { - case InferenceEngine::EltwiseLayer::Sum: - return cldnn::eltwise_mode::sum; - case InferenceEngine::EltwiseLayer::Prod: - return cldnn::eltwise_mode::prod; - case InferenceEngine::EltwiseLayer::Max: - return cldnn::eltwise_mode::max; - case InferenceEngine::EltwiseLayer::Sub: - return cldnn::eltwise_mode::sub; - case InferenceEngine::EltwiseLayer::Min: - return cldnn::eltwise_mode::min; - case InferenceEngine::EltwiseLayer::Div: - return cldnn::eltwise_mode::div; - case InferenceEngine::EltwiseLayer::Squared_diff: - return cldnn::eltwise_mode::squared_diff; - case InferenceEngine::EltwiseLayer::Equal: - return cldnn::eltwise_mode::eq; - case InferenceEngine::EltwiseLayer::Not_equal: - return cldnn::eltwise_mode::ne; - case InferenceEngine::EltwiseLayer::Less: - return cldnn::eltwise_mode::lt; - case InferenceEngine::EltwiseLayer::Less_equal: - return cldnn::eltwise_mode::le; - case InferenceEngine::EltwiseLayer::Greater: - return cldnn::eltwise_mode::gt; - case InferenceEngine::EltwiseLayer::Greater_equal: - return cldnn::eltwise_mode::ge; - case InferenceEngine::EltwiseLayer::Logical_AND: - return cldnn::eltwise_mode::logic_and; - case InferenceEngine::EltwiseLayer::Logical_OR: - return cldnn::eltwise_mode::logic_or; - case InferenceEngine::EltwiseLayer::Logical_XOR: - return cldnn::eltwise_mode::logic_xor; - default: THROW_CLDNN_EXCEPTION("Unsupported eltwise operation: " << op); - break; - } + node2layer.emplace_back(prim_info, layer); - return cldnn::eltwise_mode::max; // shouldn't get here -} + size_t in_size = prim_info.c_dependencies.size(); -cldnn::concatenation::concatenation_axis CLDNNGraph::ConcatAxisFromIEAxis(unsigned axis) { - switch (axis) { - case 0: - return cldnn::concatenation::concatenation_axis::along_b; - case 1: - return cldnn::concatenation::concatenation_axis::along_f; - case 2: - return cldnn::concatenation::concatenation_axis::along_y; - case 3: - return cldnn::concatenation::concatenation_axis::along_x; - default: THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis); - break; - } + if (filter_const_primitives) { + // Decrease expected dependencies count if there is a const input without original id in the IR + for (auto& dep : prim_info.c_dependencies.cpp_ids) { + auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) { + return entry.original_id == dep; + }); - return cldnn::concatenation::concatenation_axis::along_f; // shouldn't get here -} + if (it == primitives_info.end()) + --in_size; -void CLDNNGraph::CreatePrimitiveFromBlob(cldnn::primitive_id primID, - const InferenceEngine::Blob::Ptr pBlob, - const cldnn::layout& blobLayout, - size_t blobByteOffset, - WeightRearrangeType rearrange) { - auto mem = cldnn::memory::allocate(*(m_env.engine), blobLayout); - auto tmpPointer = mem.pointer(); // implicitly maps buffer - unmap in destructor - auto buf = tmpPointer.data(); - auto bufSize = blobLayout.bytes_count(); -// The condition below is not valid once we use groups - todo: think of some other size check here -// if ((pBlob != nullptr) && -// (pBlob->size() * (broadcastFeatures ? blobLayout.size.feature[0] : 1)) != blobLayout.count()) { -// THROW_CLDNN_EXCEPTION("Unexpected blob size"); -// } - if (pBlob == nullptr) { - THROW_CLDNN_EXCEPTION("Missing blob data: " << primID); - } else if ((pBlob->layout() != InferenceEngine::OIHW) && - (pBlob->layout() != InferenceEngine::NCHW) && - (pBlob->layout() != InferenceEngine::CHW) && - (pBlob->layout() != InferenceEngine::NC) && - (pBlob->layout() != InferenceEngine::C)) { - // TODO: support more layouts - THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(pBlob->layout()) << ") in blob: " << primID); - } else if (rearrange == BroadcastFeatures) { - size_t features = static_cast(blobLayout.size.feature[0]); - if (pBlob->size() != features) { - THROW_CLDNN_EXCEPTION("Invalid blob dimensions to broadcast: " << primID); - } - auto data = static_cast(pBlob->buffer()); - auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type); - size_t featureElements = blobLayout.count() / static_cast(blobLayout.size.feature[0]); - IE_ASSERT(blobLayout.format == cldnn::format::bfyx); - for (size_t f = 0; f < features; f++) { - for (size_t e = 0; e < featureElements; e++) { - for (size_t b = 0; b < elementSize; b++) { - buf[(f*featureElements + e)*elementSize + b] = data[f*elementSize + b]; + if (it->type_id == "data") { + std::vector childOriginalNames{find_origin_layers(prim_info.original_id)}; + --in_size; } } } - } else if (rearrange == FlipDeconvDims) { - auto data = static_cast(pBlob->buffer()); - auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type); - - size_t inputFeatureElements = static_cast(blobLayout.size.feature[0]); - size_t outputFeatureElements = static_cast(blobLayout.size.batch[0]); - - size_t featureSize = elementSize * static_cast(blobLayout.size.spatial[0] * blobLayout.size.spatial[1]); + layer->insData.resize(in_size); + layer->outData.resize(prim_info.c_users.size()); - for (size_t i = 0; i < inputFeatureElements; i++) { - for (size_t o = 0; o < outputFeatureElements; o++) { - size_t outputShift = (o*inputFeatureElements + i)*featureSize; - size_t inputShift = (i*outputFeatureElements + o)*featureSize; + return layer; + }; - for (size_t b = 0; b < featureSize; b++) { - buf[outputShift + b] = data[inputShift + b]; + if (filter_const_primitives) { + for (auto& pi : primitives_info) { + // extract mutable_data primitives and connect it's dependencies and users directly + if (pi.type_id == "mutable_data") { + if (pi.c_dependencies.cpp_ids.size() == 1 && !pi.c_users.cpp_ids.empty()) { + auto dep = pi.c_dependencies.cpp_ids[0]; + auto users = pi.c_users.cpp_ids; + auto it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) { + return entry.original_id == dep; + }); + if (it == primitives_info.end()) + continue; + + auto& dep_users = it->c_users.cpp_ids; + // Remove mutable data from users list + dep_users.erase(std::find_if(dep_users.begin(), dep_users.end(), [&](std::string user_id) { + return user_id == pi.original_id; + })); + + // Add mutable data users to it's dependency users + dep_users.insert(dep_users.end(), users.begin(), users.end()); + + for (auto& user : users) { + it = std::find_if(primitives_info.begin(), primitives_info.end(), [&](cldnn::primitive_info& entry) { + return entry.original_id == user; + }); + if (it == primitives_info.end()) + continue; + + for (auto& d : it->c_dependencies.cpp_ids) { + if (d == pi.original_id) + d = dep; + } + } } } } - } else { - auto data = static_cast(pBlob->buffer()); - for (size_t i = 0; i < bufSize; i++) { - buf[i] = data[i + blobByteOffset]; - } - } - m_topology->add(cldnn::data(primID, mem)); -} - -void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPtr& layer, - std::vector& weightsPrimID, - std::vector& biasesPrimID) { - cldnn::tensor::value_type inFeatures = 1; // todo: workaround for xyf input, handle general case (xf, xyzf etc...) - std::shared_ptr insData0 = layer->insData[0].lock(); - IE_ASSERT(insData0 != nullptr); - if (insData0->dims.size() > 2) { - inFeatures = TensorValue(insData0->dims[2]); - } - cldnn::tensor::value_type outFeatures(0); - std::vector weightDimsVec; - InferenceEngine::Blob::Ptr pWeightsBlob, pBiasBlob; - unsigned groupSize = 1; - WeightRearrangeType rearrange = NO_REARRANGE; - - switch (LayerTypeFromStr(layer->type)) { - case Convolution: { - auto convLayer = as (layer); - if ((inFeatures % groupSize) || (convLayer->_out_depth % groupSize)) { - THROW_CLDNN_EXCEPTION("Invalid group size in layer " << convLayer->name); - } - groupSize = convLayer->_group; - if (groupSize >= 16) // cldnn optimization for 16 and more groups - groupSize = 1; - weightDimsVec = { - TensorValue(convLayer->_out_depth / groupSize), - TensorValue(inFeatures / convLayer->_group), - TensorValue(convLayer->_kernel[X_AXIS]), - TensorValue(convLayer->_kernel[Y_AXIS]) - }; - outFeatures = convLayer->_out_depth; - pWeightsBlob = convLayer->_weights; - pBiasBlob = convLayer->_biases; - } - break; - case Deconvolution: { - auto deconvLayer = as (layer); - if ((inFeatures % groupSize) || (deconvLayer->_out_depth % groupSize)) { - THROW_CLDNN_EXCEPTION("Invalid group size in layer " << deconvLayer->name); - } - groupSize = deconvLayer->_group; - if (groupSize >= 16) // cldnn optimization for 16 and more groups - groupSize = 1; - weightDimsVec = { - TensorValue(deconvLayer->_out_depth / groupSize), - TensorValue(inFeatures / deconvLayer->_group), - TensorValue(deconvLayer->_kernel[X_AXIS]), - TensorValue(deconvLayer->_kernel[Y_AXIS]) - }; - outFeatures = deconvLayer->_out_depth; - pWeightsBlob = deconvLayer->_weights; - pBiasBlob = deconvLayer->_biases; - - if ((groupSize < outFeatures) || (groupSize < inFeatures)) - rearrange = FlipDeconvDims; - } - break; - default: - IE_ASSERT("Wrong weightable layer type"); // shouldn't get here - break; } - // create weights primitive - cldnn::layout weightsLayout = cldnn::layout( - DataTypeFromPrecision(layer->precision), - m_defaultFormat, - cldnn::tensor(weightDimsVec)); - size_t bytesPerGroup = weightsLayout.bytes_count(); - - for (unsigned g = 0; g < groupSize; g++) { - cldnn::primitive_id weightID = layer_type_name_ID(layer) + m_weightsTag + std::to_string(g); - CreatePrimitiveFromBlob( - weightID, - pWeightsBlob, - weightsLayout, - g * bytesPerGroup, - rearrange); - weightsPrimID.push_back(weightID); - } + for (auto& pi : primitives_info) { + if (filter_const_primitives) { + // Skip const inputs + if (pi.type_id == "data") { + continue; + } + + // Skip mutable_data + if (pi.type_id == "mutable_data" && + pi.c_dependencies.cpp_ids.size() == 1 && + !pi.c_users.cpp_ids.empty()) { + continue; + } + } + auto layer = create_layer(pi); + net->addLayer(layer); + } + + auto desc_from_layout = [&](cldnn::layout layout) -> TensorDesc { + Precision precision = data_type_to_precision(layout.data_type); + SizeVector dims; + Layout l = Layout::NCHW; + auto size = layout.size; + if (layout.format.dimension() == 4) { + dims = {static_cast(size.batch[0]), + static_cast(size.feature[0]), + static_cast(size.spatial[1]), + static_cast(size.spatial[0])}; + } else if (layout.format.dimension() == 5) { + dims = {static_cast(size.batch[0]), + static_cast(size.feature[0]), + static_cast(size.spatial[2]), + static_cast(size.spatial[1]), + static_cast(size.spatial[0])}; + l = Layout::NCDHW; + } else if (layout.format.dimension() == 6) { + dims = {static_cast(size.batch[0]), + static_cast(size.feature[0]), + static_cast(size.spatial[3]), + static_cast(size.spatial[2]), + static_cast(size.spatial[1]), + static_cast(size.spatial[0])}; + // Should be NC?DHW but there is no such layout yet + l = Layout::BLOCKED; + } + TensorDesc dst{precision, dims, l}; + return dst; + }; - // create bias primitive - if (pBiasBlob != nullptr) { - cldnn::layout biasesLayout = cldnn::layout( - DataTypeFromPrecision(layer->precision), - m_defaultFormat, - cldnn::spatial(TensorValue(outFeatures / groupSize))); - size_t bytesPerGroup = biasesLayout.bytes_count(); - for (unsigned g = 0; g < groupSize; g++) { - cldnn::primitive_id biasID = layer_type_name_ID(layer) + m_biasesTag + std::to_string(g); - CreatePrimitiveFromBlob( - biasID, - pBiasBlob, - biasesLayout, - g * bytesPerGroup); - biasesPrimID.push_back(biasID); - } - } -} + for (auto& pair : node2layer) { + auto pi = pair.first; + auto layer = pair.second; + auto user_ids = pi.c_users.cpp_ids; + for (int i = 0; i < user_ids.size(); i++) { + auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair& entry) { + return entry.first.original_id == user_ids[i]; + }); + + if (it == node2layer.end()) + continue; + + auto& child_layer = it->second; + + DataPtr data; + if (i < layer->outData.size()) { + std::string data_name = pi.original_id + "_out" + std::to_string(i); + layer->outData[i] = std::make_shared(data_name, desc_from_layout(pi.output_layout)); + data = layer->outData[i]; + data->getCreatorLayer() = layer; + } else { + data = layer->outData[0]; + } -void CLDNNGraph::CreateScaleWeightsAndBiasesFromBN( - const InferenceEngine::BatchNormalizationLayer* bnLayer, - cldnn::primitive_id weightsPrimID, - cldnn::primitive_id biasesPrimID) { + int in_port_id = 0; + for (auto& dep : it->first.c_dependencies.cpp_ids) { + if (filter_const_primitives) { + auto it = std::find_if(node2layer.begin(), node2layer.end(), [&](std::pair& entry) { + return entry.first.original_id == dep; + }); - if (bnLayer->_weights->dims() != bnLayer->_biases->dims()) { - THROW_CLDNN_EXCEPTION("mean/variance dimensions mismatch in " << bnLayer->name); - } - if (bnLayer->_weights->precision() != bnLayer->_biases->precision()) { - THROW_CLDNN_EXCEPTION("mean/variance precision mismatch in " << bnLayer->name); - } + if (it == node2layer.end()) + continue; + } - cldnn::tensor blobTensor(0); - switch (bnLayer->outData[0]->dims.size()) { - case 2: - blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0])); - break; - case 4: - blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2])); - break; - default: - THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name); - } - cldnn::layout blobLayout( - DataTypeFromPrecision(bnLayer->precision), - m_defaultFormat, - blobTensor); - - switch (bnLayer->_weights->precision()) { - case Precision::FP16: { - InferenceEngine::TBlob weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims()); - weightsBlob.allocate(); - InferenceEngine::TBlob biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims()); - biasesBlob.allocate(); - - auto weightsData = weightsBlob.data(); - auto biasesData = biasesBlob.data(); - auto varianceData = static_cast(bnLayer->_weights->buffer()); - auto meanData = static_cast(bnLayer->_biases->buffer()); - - cldnn_status status = CLDNN_SUCCESS; - for (size_t i = 0; i < weightsBlob.size(); i++) { - auto variance = cldnn_half_to_float(varianceData[i], &status); - if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); - auto mean = cldnn_half_to_float(meanData[i], &status); - if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); - - float scale = 1.0f / sqrt(variance + bnLayer->epsilon); - weightsData[i] = cldnn_float_to_half(scale, &status); - if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); - biasesData[i] = cldnn_float_to_half((-mean) * scale, &status); - if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); - } - CreatePrimitiveFromBlob(weightsPrimID, std::make_shared>(weightsBlob), blobLayout); - CreatePrimitiveFromBlob(biasesPrimID, std::make_shared>(biasesBlob), blobLayout); - } - break; - case Precision::FP32: { - InferenceEngine::TBlob weightsBlob(bnLayer->_weights->precision(), bnLayer->_weights->layout(), bnLayer->_weights->dims()); - weightsBlob.allocate(); - InferenceEngine::TBlob biasesBlob(bnLayer->_biases->precision(), bnLayer->_weights->layout(), bnLayer->_biases->dims()); - biasesBlob.allocate(); - - auto weightsData = weightsBlob.data(); - auto biasesData = biasesBlob.data(); - auto varianceData = static_cast(bnLayer->_weights->buffer()); - auto meanData = static_cast(bnLayer->_biases->buffer()); - - for (size_t i = 0; i < weightsBlob.size(); i++) { - auto variance = varianceData[i]; - auto mean = meanData[i]; - weightsData[i] = 1.0f / sqrt(variance + bnLayer->epsilon); - biasesData[i] = (-mean) * weightsData[i]; + if (dep == pi.original_id && child_layer->insData[in_port_id].lock() == nullptr) { + data->getInputTo()[child_layer->name] = child_layer; + child_layer->insData[in_port_id] = data; + break; + } + in_port_id++; + } } - CreatePrimitiveFromBlob(weightsPrimID, std::make_shared>(weightsBlob), blobLayout); - CreatePrimitiveFromBlob(biasesPrimID, std::make_shared>(biasesBlob), blobLayout); - } - break; - default: - THROW_CLDNN_EXCEPTION("Unhandled mean/variance precision in " << bnLayer->name); - break; } -} + // Specify inputs data + for (auto& pair : node2layer) { + auto pi = pair.first; + auto layer = pair.second; + if (pi.c_dependencies.size() != 0) + continue; -void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer) { - // Initialize a profiling entry - InitProfileInfo(layer->name, layer->type); + auto in_info = std::make_shared(); + if (layer->outData.empty()) + continue; - // First check for custom layer - auto customLayer = m_config.customLayers.find(layer->type); - if (customLayer != m_config.customLayers.end()) { - CreateCustomLayerPrimitive(layer, customLayer->second); - return; - } + auto dt = layer->outData[0]; + auto tensor_desc = desc_from_layout(pi.output_layout); - // Otherwise move on to built-in layer types - switch (LayerTypeFromStr(layer->type)) { - case Convolution: CreateConvolutionPrimitive(layer); - break; - case ReLU: - case ReLU6: - case Sigmoid: - case TanH: - case ELU: - case Clamp: - case Activation: - case Exp: - case Not: - CreateActivationPrimitive(layer, LayerTypeFromStr(layer->type)); - break; - case LRN: CreateLRNPrimitive(layer); - break; - case Pooling: CreatePoolingPrimitive(layer); - break; - case Unpooling: CreateMaxUnpoolingPrimitive(layer); - break; - case FullyConnected: CreateFullyConnectedPrimitive(layer); - break; - case SoftMax: CreateSoftMaxPrimitive(layer); - break; - case Power: CreatePowerPrimitive(layer); - break; - case Split: CreateSplitPrimitive(layer); - break; - case Concatenate: CreateConcatenatePrimitive(layer); - break; - case Eltwise: CreateEltwisePrimitive(layer); - break; - case SimplerNMS: CreateSimplerNMSPrimitive(layer); - break; - case ROIPooling: CreateROIPoolingPrimitive(layer); - break; - case Crop: CreateCropPrimitive(layer); - break; - case Deconvolution: CreateDeconvolutionPrimitive(layer); - break; - case PriorBox: CreatePriorBoxPrimitive(layer); - break; - case DetectionOutput: CreateDetectionOutputPrimitive(layer); - break; - case Normalize: CreateNormalizePrimitive(layer); - break; - case Reshape: CreateReshapePrimitive(layer); - break; - case Permute: CreatePermutePrimitive(layer); - break; - case Flatten: CreateFlattenPrimitive(layer); - break; - case BatchNormalization: CreateBatchNormalizationPrimitive(layer); - break; - case PReLU: CreatePReLUPrimitive(layer); - break; - case ScaleShift: CreateScaleShiftPrimitive(layer); - break; - case Proposal: CreateProposalPrimitive(layer); - break; - case PSROIPooling: CreatePSROIPoolingPrimitive(layer); - break; - case Copy: CreateCopyPrimitive(layer); - break; - case Upsampling: CreateUpsamplingPrimitive(layer); - break; - case Resample: CreateResamplePrimitive(layer); - break; - case ArgMax: CreateArgMaxPrimitive(layer); - break; - case MVN: CreateMVNPrimitive(layer); - break; - case LSTMCell: CreateLSTMCellPrimitive(layer); - break; - case RNN: CreateRNNPrimitive(layer); - break; - case RegionYolo: CreateYOLO2RegionPrimitive(layer); - break; - case ReorgYolo: CreateYOLO2ReorgPrimitive(layer); - break; - case Tile: CreateTilePrimitive(layer); - break; - case Pad: CreatePadPrimitive(layer); - break; - case Gather: CreateGatherPrimitive(layer); - break; - case DepthToSpace: CreateDepthToSpacePrimitive(layer); - break; - case ShuffleChannels: CreateShuffleChannelsPrimitive(layer); - break; - case StridedSlice: CreateStridedSlicePrimitive(layer); - break; - case ReverseSequence: CreateReverseSequencePrimitive(layer); - break; - default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type); - } -} + dt->setDims(tensor_desc.getDims()); + dt->setPrecision(tensor_desc.getPrecision()); + dt->setLayout(tensor_desc.getLayout()); -void CLDNNGraph::CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto scaleShiftLayer = as (layer); - - // create scales and biases - cldnn::primitive_id scalePrimID = scaleShiftLayer->name + m_scalesTag; - cldnn::primitive_id biasPrimID = scaleShiftLayer->name + m_biasesTag; - - const auto& dims = scaleShiftLayer->_weights->dims(); - cldnn::tensor weightTensor(1); - switch (dims.size()) { - case 1: weightTensor = cldnn::feature(TensorValue(dims[0])); // value per feature (or 1 global value) - break; - case 4: weightTensor = cldnn::tensor(TensorValue(dims[0]), TensorValue(dims[1]), TensorValue(dims[3]), TensorValue(dims[2])); // value per pixel - break; - default: THROW_CLDNN_EXCEPTION("Invalid weights dimensions in layer " << layer->name); - break; - } - cldnn::layout blobLayout(DataTypeFromPrecision(layer->precision), m_defaultFormat, weightTensor); - CreatePrimitiveFromBlob(scalePrimID, scaleShiftLayer->_weights, blobLayout); - if (scaleShiftLayer->_biases != nullptr) { - if (scaleShiftLayer->_biases->dims() != dims) { - THROW_CLDNN_EXCEPTION("Invalid bias blob dimensions in layer " << layer->name); - } - CreatePrimitiveFromBlob(biasPrimID, scaleShiftLayer->_biases, blobLayout); - } else { - biasPrimID = ""; // 0-bias + in_info->setInputData(dt); + net->setInputInfo(in_info); } - std::string scaleShiftLayerName = layer_type_name_ID(layer); - auto scaleShiftPrim = cldnn::scale( - scaleShiftLayerName, - inputPrimitives[0], - scalePrimID, - biasPrimID); - - m_env.primitiveIDs[scaleShiftLayerName] = scaleShiftLayerName; - m_topology->add(scaleShiftPrim); - m_env.profilingIDs.push_back(scaleShiftLayerName); + return net; } -void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) { - ValidateLayer(layer, 3); - auto proposalLayer = as (layer); - - float nms_thresh = proposalLayer->GetParamAsFloat("nms_thresh", 0.7f); - int min_size = proposalLayer->GetParamAsInt("min_size", 16); - int feature_stride = proposalLayer->GetParamAsInt("feat_stride", 16); - int pre_nms_topn = proposalLayer->GetParamAsInt("pre_nms_topn", 6000); - int post_nms_topn = proposalLayer->GetParamAsInt("post_nms_topn", 300); - const std::vector ratio = proposalLayer->GetParamAsFloats("ratio"); - const std::vector scale = proposalLayer->GetParamAsFloats("scale"); - float box_coordinate_scale = proposalLayer->GetParamAsFloat("box_coordinate_scale", 1.0f); - float box_size_scale = proposalLayer->GetParamAsFloat("box_size_scale", 1.0f); - int base_size = proposalLayer->GetParamAsInt("base_size", 16); - std::string framework = proposalLayer->GetParamAsString("framework", ""); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - bool normalize = layer->GetParamsAsBool("normalize", false); - bool clip_before_nms = layer->GetParamsAsBool("clip_before_nms", true); - bool clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false); - - float coordinates_offset; - bool swap_xy; - bool initial_clip; - bool round_ratios; - bool shift_anchors; - - if (framework == "tensorflow") { - coordinates_offset = 0.0f; - initial_clip = true; - shift_anchors = true; - round_ratios = false; - swap_xy = true; - } else { - coordinates_offset = 1.0f; - initial_clip = false; - shift_anchors = false; - round_ratios = true; - swap_xy = false; - } - - std::string proposalLayerName = layer_type_name_ID(layer); - auto proposalPrim = cldnn::proposal( - proposalLayerName, - inputPrimitives[0], // cls_score - inputPrimitives[1], // bbox_pred - inputPrimitives[2], // im_info - 0, // max_num_proposals is unused - nms_thresh, - base_size, - min_size, - feature_stride, - pre_nms_topn, - post_nms_topn, - ratio, - scale, - coordinates_offset, - box_coordinate_scale, - box_size_scale, - swap_xy, - initial_clip, - clip_before_nms, - clip_after_nms, - round_ratios, - shift_anchors, - normalize); - - m_env.primitiveIDs[proposalLayerName] = proposalLayerName; - m_topology->add(proposalPrim); - m_env.profilingIDs.push_back(proposalLayerName); +void CLDNNGraph::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) { + auto primitives_info = GetNetwork()->get_primitives_info(); + graphPtr = GetExecGraphInfoByPrimitivesInfo(primitives_info, true); } -void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto preluLayer = as (layer); - std::string preluLayerName = layer_type_name_ID(layer); - auto inDataPtr = preluLayer->insData[0].lock(); - if (!inDataPtr) { - THROW_CLDNN_EXCEPTION("Data inserted into PreLu " << preluLayer->name << " is nullptr"); +void CLDNNGraph::UpdatePerfStatistics() { + if (GetNetworksCount() == 0) { + return; } - auto inputDims = inDataPtr->dims; - static const std::string blobName("weights"); - ValidateGenericLayerBlobs(preluLayer, { blobName }); - bool channel_shared = preluLayer->GetParamsAsBool("channel_shared", false); + std::map executedPrimitives = GetNetwork()->get_executed_primitives(); + auto allPrimitives = GetNetwork()->get_all_primitives(); - auto slopeBlob = preluLayer->blobs.at(blobName); - if (channel_shared) { - if (slopeBlob->dims()[0] != 1) { - THROW_CLDNN_EXCEPTION("PReLU slope blob with wrong dimensions in " << preluLayer->name); - } - float slope(0.0f); - switch (slopeBlob->precision()) { - case InferenceEngine::Precision::FP32: - slope = *static_cast(slopeBlob->buffer()); - break; - case InferenceEngine::Precision::FP16: - { - cldnn_status status = CLDNN_SUCCESS; - slope = cldnn_half_to_float(*static_cast(slopeBlob->buffer()), &status); - if (status != CLDNN_SUCCESS) { - THROW_CLDNN_EXCEPTION("Error converting fp16 value in " << preluLayer->name); + // Get profiling info for all layers + for (auto &profiledID : profilingIDs) { + auto& perfCount = perfMap[profiledID].second; + // Change status if layer wasn't executed by cldnn engine + if (perfCount.num == 0 && + executedPrimitives.find(profiledID) == executedPrimitives.end()) { + if (allPrimitives.find(profiledID) != allPrimitives.end() && + allPrimitives.at(profiledID) == "_optimized_") { + // Layer was marked as optimized by cldnn + perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT; + } else { + // Layer wasn't run for some reason + perfCount.status = InferenceEngineProfileInfo::NOT_RUN; } + continue; } - break; - default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name); - } - m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f })); - } else { - CreateGenericLayerBlobPrimitives(preluLayer); - cldnn::primitive_id slopePrimID(preluLayerName + "_" + blobName + m_weightsTag); - m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], slopePrimID, activation_relu_negative_slope)); - } - - m_env.primitiveIDs[preluLayerName] = preluLayerName; - m_env.profilingIDs.push_back(preluLayerName); -} - -void CLDNNGraph::CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr & layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - std::string bnLayerName = layer_type_name_ID(layer); - - auto bnLayer = as (layer); - cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag; - cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag; - -#define _SCALE_BN_OPT -#ifdef _SCALE_BN_OPT - // Using scale as an optimization (1 mad instead of mad+rsq) - // create new blobs for scale shift - CreateScaleWeightsAndBiasesFromBN(bnLayer, weightID, biasID); - auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID); - - m_env.primitiveIDs[bnLayerName] = bnLayerName; - m_topology->add(scalePrim); - m_env.profilingIDs.push_back(bnLayerName); - return; -#else - cldnn::tensor blobTensor(0); - switch (bnLayer->outData[0]->dims.size()) { - case 2: - blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[0])); - break; - case 4: - blobTensor = cldnn::feature(TensorValue(bnLayer->outData[0]->dims[2])); - break; - default: - THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name); - } - cldnn::layout blobLayout( - DataTypeFromPrecision(layer->precision), - m_defaultFormat, - blobTensor); - - // Create variance primitive - cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag; - CreatePrimitiveFromBlob(varianceID, bnLayer->_weights, blobLayout); - - // Create mean primitive - cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag; - CreatePrimitiveFromBlob(meanID, bnLayer->_biases, blobLayout); - - auto bnPrim = cldnn::batch_norm( - bnLayerName, - inputPrimitives[0], - meanID, - varianceID, - bnLayer->epsilon); - - m_env.primitiveIDs[bnLayerName] = bnLayerName; - m_topology->add(bnPrim); - m_env.profilingIDs.push_back(bnLayerName); -#endif // _SCALE_BN_OPT -} - -void CLDNNGraph::CreateFlattenPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto flattenLayer = as (layer); - std::string flattenLayerName = layer_type_name_ID(layer); - - auto flattenPrim = cldnn::reshape( - flattenLayerName, - inputPrimitives[0], - CldnnTensorFromIEDims(flattenLayer->outData[0]->dims)); - - m_env.primitiveIDs[flattenLayerName] = flattenLayerName; - m_topology->add(flattenPrim); - m_env.profilingIDs.push_back(flattenLayerName); -} - -void CLDNNGraph::CreatePermutePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto permuteLayer = as (layer); - std::vector ie_order; - for (auto& a : permuteLayer->GetParamAsInts("order")) - ie_order.push_back(static_cast(a)); - - // if order size is less than 4 - fill the rest with just copy - for (auto o = ie_order.size(); o < 4; o++) - ie_order.push_back((uint16_t)o); - - /* - Because ofthe cldnn ordering: bfxy, and IE ordering: bfyx - wee need to adjust the permute order. - */ - std::vector cldnn_permute_order; - // 1. Switch permute order values (x and y) - for (auto const& o : ie_order) { - if (o == 2) - cldnn_permute_order.push_back(3); - else if (o == 3) - cldnn_permute_order.push_back(2); - else - cldnn_permute_order.push_back(o); - } - // 2. Swap x and y positions - std::swap(cldnn_permute_order[2], cldnn_permute_order[3]); - - std::string permuteLayerName = layer_type_name_ID(layer); - auto permutePrim = cldnn::permute( - permuteLayerName, - inputPrimitives[0], - cldnn_permute_order); + auto event = executedPrimitives.at(profiledID); + executedPrimitives.erase(profiledID); - m_env.primitiveIDs[permuteLayerName] = permuteLayerName; - m_topology->add(permutePrim); - m_env.profilingIDs.push_back(permuteLayerName); -} - -void CLDNNGraph::CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto reshapeLayer = as (layer); - IE_ASSERT(reshapeLayer->outData.size()); - std::string reshapeLayerName = layer_type_name_ID(layer); - - auto reshapePrim = cldnn::reshape( - reshapeLayerName, - inputPrimitives[0], - CldnnTensorFromIEDims(reshapeLayer->outData[0]->dims)); - - m_env.primitiveIDs[reshapeLayerName] = reshapeLayerName; - m_topology->add(reshapePrim); - m_env.profilingIDs.push_back(reshapeLayerName); -} + cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()}; -void CLDNNGraph::CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto normLayer = as (layer); - ValidateGenericLayerBlobs(normLayer, { "weights" }); - CreateGenericLayerBlobPrimitives(normLayer); + // Collect timings + for (auto &interval : cldnnInfo.intervals) { + using duration_t = std::chrono::duration; + auto count = std::chrono::duration_cast(interval.value->value()).count(); - // params - bool across_spatial = normLayer->GetParamsAsBool("across_spatial", true); - float eps = normLayer->GetParamAsFloat("eps", 0.0f); + if (interval.name == "submission") { + perfCount.cpu_uSec += count; + } else if (interval.name == "executing") { + perfCount.realTime_uSec += count; + } else if (interval.name == "duration") { // "duration" is used for CPU layers + perfCount.cpu_uSec += count; - // WA for MO outputting %.6f - if (eps == 0.0f) { - eps = 1e-10f; + if (perfCount.num == 0) + perfCount.isCPU = true; + } + } + perfCount.num++; } - - std::string normLayerName = layer_type_name_ID(layer); - auto normPrim = cldnn::normalize( - normLayerName, - inputPrimitives[0], - normLayerName + "_weights" + m_weightsTag, - across_spatial, - eps); - - m_env.primitiveIDs[normLayerName] = normLayerName; - m_topology->add(normPrim); - m_env.profilingIDs.push_back(normLayerName); } -void CLDNNGraph::CreateDetectionOutputPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 3); - auto detectionLayer = as (layer); - - uint32_t num_classes = detectionLayer->GetParamAsUInt("num_classes", 1); - bool share_location = detectionLayer->GetParamsAsBool("share_location", true); - int background_label_id = detectionLayer->GetParamAsInt("background_label_id", 0); - float nms_threshold = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f); - int top_k = detectionLayer->GetParamAsInt("top_k", -1); - float confidence_threshold = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX); - float eta = detectionLayer->GetParamAsFloat("eta", 1.0f); - int keep_top_k = detectionLayer->GetParamAsInt("keep_top_k", -1); - bool variance_encoded_in_target = detectionLayer->GetParamsAsBool("variance_encoded_in_target", false); - int input_width = detectionLayer->GetParamAsInt("input_width", -1); - int input_height = detectionLayer->GetParamAsInt("input_height", -1); - bool normalized = detectionLayer->GetParamsAsBool("normalized", true); - std::string code_type = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER"); - bool clip_before_nms = detectionLayer->GetParamsAsBool("clip_before_nms", false) || - detectionLayer->GetParamsAsBool("clip", false); // For backward compatibility - bool clip_after_nms = detectionLayer->GetParamsAsBool("clip_after_nms", false); - bool decrease_label_id = detectionLayer->GetParamsAsBool("decrease_label_id", false); - - cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type); - int32_t prior_info_size = normalized != 0 ? 4 : 5; - int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1; - - auto inputPrimitives = GetPrevLayersPrimitives(layer); - std::string detectionLayerName = layer_type_name_ID(layer); - auto detectionPrim = cldnn::detection_output(detectionLayerName, - inputPrimitives[0], - inputPrimitives[1], - inputPrimitives[2], - num_classes, - keep_top_k, - share_location, - background_label_id, - nms_threshold, - top_k, - eta, - cldnnCodeType, - variance_encoded_in_target, - confidence_threshold, - prior_info_size, - prior_coordinates_offset, - normalized, - input_width, - input_height, - decrease_label_id, - clip_before_nms, - clip_after_nms); - - m_env.primitiveIDs[detectionLayerName] = detectionLayerName; - m_topology->add(detectionPrim); - m_env.profilingIDs.push_back(detectionLayerName); +bool CLDNNGraph::IsLoaded() const { + return GetNetwork() != nullptr; } -void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 2); - auto priorBoxLayer = as (layer); - - // params - std::vector min_size = priorBoxLayer->GetParamAsFloats("min_size"); - std::vector max_size = priorBoxLayer->GetParamAsFloats("max_size", {}); - std::vector aspect_ratio = priorBoxLayer->GetParamAsFloats("aspect_ratio", {}); - std::vector variance = priorBoxLayer->GetParamAsFloats("variance"); - bool flip = priorBoxLayer->GetParamsAsBool("flip", true); - bool clip = priorBoxLayer->GetParamsAsBool("clip", false); - bool scale_all_sizes = priorBoxLayer->GetParamsAsBool("scale_all_sizes", true); - float offset = priorBoxLayer->GetParamAsFloat("offset", 0.5f); - - auto step_w = priorBoxLayer->GetParamAsFloat("step_w", 0.0f); - auto step_h = priorBoxLayer->GetParamAsFloat("step_h", 0.0f); - auto step = priorBoxLayer->GetParamAsFloat("step", 0.0f); - - float _step_w = 0.0f; - float _step_h = 0.0f; - if (HasParam(priorBoxLayer->params, "step_w") && step_w != 0.0f && - HasParam(priorBoxLayer->params, "step_h") && step_h != 0.0f) { - _step_w = step_w; - _step_h = step_h; - } else if (HasParam(priorBoxLayer->params, "step") && step != 0.0f) { - _step_w = step; - _step_h = step; - } - - int img = priorBoxLayer->GetParamAsInt("img_size", 0); - int img_w = priorBoxLayer->GetParamAsInt("img_w", 0); - int img_h = priorBoxLayer->GetParamAsInt("img_h", 0); - if ((img != 0) || (img_w != 0) || (img_h != 0)) { - // unsupported mode - THROW_CLDNN_EXCEPTION("Unsupported image sizes in prior box " + layer->name + " (use an image blob instead of dimensions)"); - } - - IE_ASSERT(layer->insData[1].lock()); - auto img_dims = layer->insData[1].lock()->dims; - cldnn::tensor img_size = cldnn::spatial(TensorValue(img_dims[0]), TensorValue(img_dims[1])); - std::vector inputPrimitives = GetPrevLayersPrimitives(layer); - // second input isn't used by value - only dimensions taken from the layer input +void CLDNNGraph::UpdateImplementationsMap() { + if (m_config.useProfiling) { + auto extractImplementationFromInfo = [](const std::string& info) -> std::string { + std::string def_implementation = "undef"; + std::string impl_section = "implementation :"; + std::string::size_type pos = info.find(impl_section); + if (pos == std::string::npos) { + return def_implementation; + } - if (_step_w == 0.0f || _step_h == 0.0f) { - _step_w = static_cast(img_w) / static_cast(img_dims[0]); - _step_h = static_cast(img_h) / static_cast(img_dims[1]); - } + std::string::size_type end_pos = info.find(',', pos); + if (end_pos == std::string::npos) { + return def_implementation; + } - std::string priorBoxLayerName = layer_type_name_ID(layer); - auto priorBoxPrim = cldnn::prior_box( - priorBoxLayerName, - inputPrimitives[0], - img_size, - min_size, - max_size, - aspect_ratio, - flip, - clip, - variance, - _step_w, - _step_h, - offset, - scale_all_sizes); - - m_env.primitiveIDs[priorBoxLayerName] = priorBoxLayerName; - m_topology->add(priorBoxPrim); - m_env.profilingIDs.push_back(priorBoxLayerName); -} + std::string::size_type length = end_pos - pos - impl_section.size(); -void CLDNNGraph::CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto deconvLayer = as (layer); + auto trim = [](const std::string& str) { + size_t first = str.find_first_not_of(' '); + if (std::string::npos == first) { + return str; + } + size_t last = str.find_last_not_of(' '); + return str.substr(first, (last - first + 1)); + }; + std::string tmp = trim(info.substr(pos + impl_section.size(), length)); - if (deconvLayer->_dilation[X_AXIS] != 1 || deconvLayer->_dilation[Y_AXIS] != 1) { - THROW_CLDNN_EXCEPTION("Unsupported dilation in deconvolution " << layer->name); - } + return tmp.length() > 1 ? tmp : def_implementation; + }; - std::vector weightPrimID; - std::vector biasPrimID; - CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID); - auto allPads = getPaddings(*deconvLayer); - cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), - cldnn::spatial(deconvLayer->_stride[X_AXIS], deconvLayer->_stride[Y_AXIS])); - cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), - cldnn::spatial(-allPads.begin[X_AXIS], -allPads.begin[Y_AXIS])); - - std::string deconvLayerName = layer_type_name_ID(layer); - - if (deconvLayer->_group >= 16) { - auto deconvPrim = cldnn::deconvolution(deconvLayerName, - inputPrimitives[0], - weightPrimID, - biasPrimID, - deconvLayer->_group, - stride, - padding, - false, - 0.0f, - CldnnTensorFromIEDims(deconvLayer->outData[0]->dims)); - m_topology->add(deconvPrim); - } else { - auto deconvPrim = cldnn::deconvolution(deconvLayerName, - inputPrimitives[0], - weightPrimID, - biasPrimID, - stride, - padding, - false, - 0.0f, - CldnnTensorFromIEDims(deconvLayer->outData[0]->dims)); - m_topology->add(deconvPrim); - } - m_env.primitiveIDs[deconvLayerName] = deconvLayerName; - m_env.profilingIDs.push_back(deconvLayerName); -} + // Parse primitive info and extract implementation name. + for (auto& id : profilingIDs) { + std::string prim_info = ""; + try { + prim_info = GetNetwork()->get_primitive_info(id); + } catch (std::exception& /*e*/) { } -void CLDNNGraph::CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer) { - if (layer->insData.size() != 1 && layer->insData.size() != 2) { - THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name); - } - if (layer->_fusedWith) { - THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name); - } - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto cropLayer = as (layer); - IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size()); - // IE_ASSERT(cropLayer->outData[0] && cropLayer->outData[0]->dims.size() == 4); - - std::vector offset{ 0, 0, 0, 0 }; - for (size_t i = 0; i < cropLayer->axis.size(); i++) { - if (cropLayer->axis[i] < 0 || cropLayer->axis[i] > 3) { - THROW_CLDNN_EXCEPTION("Invalid crop axis: " + std::to_string(cropLayer->axis[i]) + " in layer " + cropLayer->name); + implementationsMap.insert({id, extractImplementationFromInfo(prim_info)}); } - offset[cropLayer->axis[i]] = cropLayer->offset[i]; } - auto outputDims = cropLayer->outData[0]->dims; - size_t ods = outputDims.size(); - cldnn::tensor refSize( - TensorValue(ods > 3 ? outputDims[3] : 1), - TensorValue(ods > 2 ? outputDims[2] : 1), - TensorValue(outputDims[0]), - TensorValue(outputDims[1])); - - cldnn::tensor offSize( - TensorValue(offset[0]), - TensorValue(offset[1]), - TensorValue(offset[3]), - TensorValue(offset[2])); - - std::string cropLayerName = layer_type_name_ID(layer); - auto cropPrim = cldnn::crop( - cropLayerName, - inputPrimitives[0], - refSize, - offSize); - m_env.primitiveIDs[cropLayerName] = cropLayerName; - m_topology->add(cropPrim); - m_env.profilingIDs.push_back(cropLayerName); } -void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 2); - auto roiPoolingLayer = as (layer); +void CLDNNGraph::GetPerformanceCounts(std::map &result) const { + unsigned i = 0; + for (auto& profiledID : profilingIDs) { + const auto& layerName = perfMap.at(profiledID).first; + if (layerName.length() == 0) // no layer directly associated + continue; - // params - int pooled_width = roiPoolingLayer->GetParamAsInt("pooled_w", 0); - int pooled_height = roiPoolingLayer->GetParamAsInt("pooled_h", 0); - float spatial_scale = roiPoolingLayer->GetParamAsFloat("spatial_scale", 1.0f); - std::string method = roiPoolingLayer->GetParamAsString("method", "max"); - bool position_sensitive = false; + const auto& perfCounter = perfMap.at(profiledID).second; + auto& extPerfEntry = result[layerName]; - cldnn::pooling_mode mode = cldnn::pooling_mode::max; - if (method == "bilinear") { - mode = cldnn::pooling_mode::bilinear; - } - auto inputPrimitives = GetPrevLayersPrimitives(layer); - - std::string roiPoolingLayerName = layer_type_name_ID(layer); - auto roiPoolingPrim = cldnn::roi_pooling(roiPoolingLayerName, - inputPrimitives[0], // input data - inputPrimitives[1], // input rois - mode, - position_sensitive, - pooled_width, - pooled_height, - spatial_scale); - m_env.primitiveIDs[roiPoolingLayerName] = roiPoolingLayerName; - m_topology->add(roiPoolingPrim); - m_env.profilingIDs.push_back(roiPoolingLayerName); -} - -void CLDNNGraph::CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 2); - auto psROIPoolingLayer = as (layer); - - // params - int group_size = psROIPoolingLayer->GetParamAsInt("group_size"); - int output_dim = psROIPoolingLayer->GetParamAsInt("output_dim"); - float spatial_scale = psROIPoolingLayer->GetParamAsFloat("spatial_scale"); - size_t spatial_bins_x = static_cast(psROIPoolingLayer->GetParamAsInt("spatial_bins_x", 1)); - size_t spatial_bins_y = static_cast(psROIPoolingLayer->GetParamAsInt("spatial_bins_y", 1)); - std::string mode_str = psROIPoolingLayer->GetParamAsString("mode", "average"); - bool position_sensitive = true; - - cldnn::pooling_mode mode = mode_str == "average" ? cldnn::pooling_mode::average - : cldnn::pooling_mode::bilinear; - - auto inputPrimitives = GetPrevLayersPrimitives(layer); - - std::string psROIPoolingLayerName = layer_type_name_ID(layer); - auto psROIPoolingPrim = cldnn::roi_pooling(psROIPoolingLayerName, - inputPrimitives[0], // input data - inputPrimitives[1], // input rois - mode, - position_sensitive, - group_size, - group_size, - spatial_scale, - output_dim, - spatial_bins_x, - spatial_bins_y); - - m_env.primitiveIDs[psROIPoolingLayerName] = psROIPoolingLayerName; - m_topology->add(psROIPoolingPrim); - m_env.profilingIDs.push_back(psROIPoolingLayerName); -} - -void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) { - ValidateLayer(layer, 0); - // todo: handling fusing - auto genericLayer = as (layer); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - - // Handle defines - std::string layerDefines; - for (const auto& def : customLayer->Defines()) { - std::string singleDefine("#define " + def.name + " " + def.prefix); - if (genericLayer->params.find(def.param) != genericLayer->params.end()) { - singleDefine += genericLayer->params.at(def.param); + // copy layer implementation + if (perfCounter.isCPU) { + static const std::string cpuExecType("CPU"); + memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type)); + cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU } else { - singleDefine += def.default_value; - } - singleDefine += def.postfix + "\n"; - layerDefines.append(singleDefine); - } - - // reserve - std::vector reorderedInputs; - reorderedInputs.resize(inputPrimitives.size()); - - // Handle Blobs - std::map blobIndex; - for (auto& blob : genericLayer->blobs) { - // create primitive from blob (always 1d) - cldnn::primitive_id blobId = genericLayer->name + "_" + blob.first; - if (blob.second->dims().size() != 1) { - THROW_CLDNN_EXCEPTION("Invalid dimensions for blob " << blob.first << " in layer " << genericLayer->name); - } - cldnn::layout genericBlobLayout(DataTypeFromPrecision(blob.second->precision()), - m_defaultFormat, - cldnn::tensor(1, 1, TensorValue(blob.second->dims()[0]), 1)); - CreatePrimitiveFromBlob(blobId, blob.second, genericBlobLayout); - // save index in blobIndex - blobIndex[blob.first] = reorderedInputs.size(); - // add to reorderedInputs - reorderedInputs.push_back(blobId); - } - - // Handle kernel parameters - std::vector kernelParameters; - cldnn::format outputFormat(cldnn::format::any); - for (const auto& param : customLayer->KernelParams()) { - switch (param.type) { - case CLDNNCustomLayer::ParamType::Input: { - kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); - kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input; - kernelParameters[param.paramIndex].index = static_cast((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex); - - // Handle input reorder - if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) { - // todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another) - if (param.format != cldnn::format::any) { - auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + layer->name + m_preCustomLayerTag; - auto preprocessPrim = cldnn::reorder( - reorderPrimName, - inputPrimitives[param.portIndex], - param.format, - DataTypeFromPrecision(layer->precision)); - m_topology->add(preprocessPrim); - m_env.profilingIDs.push_back(reorderPrimName); - InitProfileInfo(reorderPrimName, "Reorder"); - reorderedInputs[param.portIndex] = (reorderPrimName); - } else { - reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex]; - } - } - } - break; - case CLDNNCustomLayer::ParamType::Output: { - kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); - kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_output; - kernelParameters[param.paramIndex].index = - static_cast((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex); - outputFormat = param.format; - } - break; - case CLDNNCustomLayer::ParamType::Data: { - kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); - kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input; - kernelParameters[param.paramIndex].index = - static_cast((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName)); - } - break; - default: - THROW_CLDNN_EXCEPTION("Invalid custom layer param type: " << param.type << " in layer: " << genericLayer->name); + std::string impl = implementationsMap.at(profiledID); + impl.copy(extPerfEntry.exec_type, impl.length()); } - } - const std::string layerTitle("\n// Layer " + layer->name + " using Custom Layer " + customLayer->Name() + "\n"); - const std::string defineTitle("// Custom Layer User Defines\n"); - - auto dims = genericLayer->outData[0]->dims; - std::reverse(dims.begin(), dims.end()); - - size_t N = (dims.size() > 0) ? dims[0] : 1; - size_t C = (dims.size() > 1) ? dims[1] : 1; - size_t H = (dims.size() > 2) ? dims[2] : 1; - size_t W = (dims.size() > 3) ? dims[3] : 1; - cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H)); - - cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(genericLayer->precision), outputFormat, outputTensor); - - // evaluate work sizes rules - std::vector gws, lws; - - // assume output tensor is dimension source by default - int batchDim = outputTensor.batch[0]; - int featureDim = outputTensor.feature[0]; - int yDim = outputTensor.spatial[1]; - int xDim = outputTensor.spatial[0]; - int iidx = customLayer->InputDimSourceIndex(); - - std::string genericLayerName = layer_type_name_ID(layer); - // if input index is greater than -1, take dimension from input - if (iidx >= 0) { - if (iidx >= genericLayer->insData.size()) - THROW_CLDNN_EXCEPTION("Invalid input tensor for index: " << iidx); - // get dimensions from one of the input tensors - auto inDataPtr = genericLayer->insData[iidx].lock(); - if (!inDataPtr) { - THROW_CLDNN_EXCEPTION("Data inserted into generic layer " << genericLayer->name << " is nullptr"); - } - auto inputDims = inDataPtr->dims; - batchDim = featureDim = yDim = 0; - xDim = inputDims[0]; + extPerfEntry.execution_index = i++; + extPerfEntry.status = perfCounter.status; + extPerfEntry.cpu_uSec = perfCounter.cpu_avg(); + extPerfEntry.realTime_uSec = perfCounter.realTime_avg(); - if (dims.size() > 1) - yDim = inputDims[1]; - if (dims.size() > 2) - featureDim = inputDims[2]; - if (dims.size() > 3) - batchDim = inputDims[3]; - } - const std::map vars = { - { 'b', batchDim } , { 'B', batchDim }, - { 'f', featureDim }, { 'F', featureDim }, - { 'y', yDim }, { 'Y', yDim }, - { 'x', xDim }, { 'X', xDim }, - }; - for (auto rule : customLayer->GlobalSizeRules()) { - SimpleMathExpression expr; - expr.SetVariables(vars); - expr.SetExpression(rule); - gws.push_back(expr.Evaluate()); - } - for (auto rule : customLayer->LocalSizeRules()) { - SimpleMathExpression expr; - expr.SetVariables(vars); - expr.SetExpression(rule); - lws.push_back(expr.Evaluate()); + perfCounter.layerType.copy(extPerfEntry.layer_type, perfCounter.layerType.length()); } - auto customPrim = cldnn::custom_gpu_primitive( - genericLayerName, - reorderedInputs, - { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() }, - customLayer->KernelEntry(), - kernelParameters, - customLayer->CompilerOptions(), - outputLayout, - gws, - lws); - - if (outputLayout.format != cldnn::format::any && - p_currentOutputs->find(genericLayerName) == p_currentOutputs->end()) { - // Handle output reorder - auto reorderPrimName = genericLayerName + m_postCustomLayerTag; - m_topology->add( - cldnn::reorder( - reorderPrimName, - genericLayerName, - m_defaultFormat, - customPrim.output_layout.data_type)); - m_env.primitiveIDs[genericLayerName] = reorderPrimName; - m_env.primitiveIDs[reorderPrimName] = reorderPrimName; - m_env.profilingIDs.push_back(reorderPrimName); - InitProfileInfo(reorderPrimName, "Reorder"); - } else { - m_env.primitiveIDs[genericLayerName] = genericLayerName; + for (auto& prim : GetNetwork()->get_executed_primitive_ids()) { + if (std::find(profilingIDs.begin(), profilingIDs.end(), prim) == profilingIDs.end()) { + // TODO: add primitives that was added inside cldnn to perf stat + } } - m_topology->add(customPrim); - m_env.profilingIDs.push_back(genericLayerName); -} - -void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 3); - IE_ASSERT(layer->insData[0].lock()->dims[3] == 1); // only handling input batch size 1 - IE_ASSERT(layer->insData[1].lock()->dims[3] == 1); // only handling input batch size 1 - auto simpleNMSLayer = as (layer); - - int max_num_proposals = simpleNMSLayer->GetParamAsInt("max_num_proposals"); - float iou_threshold = simpleNMSLayer->GetParamAsFloat("iou_threshold", 0.7f); - int min_bbox_size = simpleNMSLayer->GetParamAsInt("min_bbox_size", 16); - int feature_stride = simpleNMSLayer->GetParamAsInt("feat_stride", 16); - int pre_nms_topn = simpleNMSLayer->GetParamAsInt("pre_nms_topn"); - int post_nms_topn = simpleNMSLayer->GetParamAsInt("post_nms_topn"); - std::vector scale = simpleNMSLayer->GetParamAsFloats("scale"); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - - std::string simpleNMSLayerName = layer_type_name_ID(layer); - auto simpleNMSPrim = cldnn::proposal( - simpleNMSLayerName, - inputPrimitives[0], // cls_score - inputPrimitives[1], // bbox_pred - inputPrimitives[2], // im_info - max_num_proposals, - iou_threshold, - min_bbox_size, - feature_stride, - pre_nms_topn, - post_nms_topn, - { 0.5f, 1.0f, 2.0f }, // ratios for the SimplerNMS variant - scale); - - m_env.primitiveIDs[simpleNMSLayerName] = simpleNMSLayerName; - m_topology->add(simpleNMSPrim); - m_env.profilingIDs.push_back(simpleNMSLayerName); } -void CLDNNGraph::CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateEltwiseLayer(layer); - - auto eltwiseLayer = as (layer); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - - std::vector coefficients = eltwiseLayer->coeff; - if (eltwiseLayer->_operation != InferenceEngine::EltwiseLayer::Sum && !coefficients.empty()) { - THROW_IE_EXCEPTION << "Only sum operation supports operands coefficients"; - } - - if (!coefficients.empty() && coefficients.size() != inputPrimitives.size()) { - THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands"; - } - - std::string eltwiseLayerName = layer_type_name_ID(layer); - auto eltwisePrim = cldnn::eltwise( - eltwiseLayerName, - inputPrimitives, - EltwiseModeFromIEEltwise(eltwiseLayer->_operation), - coefficients); - m_env.primitiveIDs[eltwiseLayerName] = eltwiseLayerName; - m_topology->add(eltwisePrim); - m_env.profilingIDs.push_back(eltwiseLayerName); -} +std::shared_ptr CLDNNGraph::GetNetwork(size_t idx) const { + if (idx >= GetNetworksCount()) + THROW_IE_EXCEPTION << "Unable to find network with id=" << idx << ". Stored networks count: " << GetNetworksCount(); -void CLDNNGraph::CreateConcatenatePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 0); - auto concatLayer = as (layer); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - std::string concatLayerName = layer_type_name_ID(layer); - auto concatPrim = cldnn::concatenation( - concatLayerName, - inputPrimitives, - ConcatAxisFromIEAxis(concatLayer->_axis)); - m_env.primitiveIDs[concatLayerName] = concatLayerName; - m_topology->add(concatPrim); - m_env.profilingIDs.push_back(concatLayerName); + return m_networks[idx]; } -void CLDNNGraph::CreateSplitPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto splitLayer = as (layer); - if (IsValidSplitConvMerge(splitLayer)) { - // AlextNet style split->conv*2->merge - CreateFusedSplitConvMergePrimitive(layer); - } else { -#ifdef _USE_SPLIT_PRIMITIVE - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto inputDims = splitLayer->insData[0].lock()->dims; - InferenceEngine::SizeVector startOffset(inputDims.size()); - std::vector> outputOffsets; -std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFromIEDims(inputDims) << std::endl; - for (auto& outLayer : splitLayer->outData) { - if (outLayer->dims.size() != startOffset.size()) { - THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name); - } - for (size_t i = 0; i < inputDims.size(); i++) { - if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) { - THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name); - } - } - auto outTensor = CldnnTensorFromIEDims(outLayer->dims); - auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, CldnnTensorFromIEDims(startOffset)); - m_topology->add(cropPrim); - m_env.primitiveIDs[outLayer->name] = outLayer->name; - m_env.profilingIDs.push_back(outLayer->name); - outputOffsets.push_back({ outLayer->name, CldnnTensorFromIEDims(startOffset) }); - for (size_t i = 0; i < inputDims.size(); i++) { - if (outLayer->dims[i] != inputDims[i]) { - startOffset[i] += outLayer->dims[i]; - } - } - } - - auto splitPrim = cldnn::split( - splitLayer->name, - inputPrimitives[0], - outputOffsets); - m_topology->add(splitPrim); +std::string CLDNNGraph::MapOutputName(std::string outName) const { + auto networkOutputsIDs = GetNetwork()->get_output_ids(); + auto allPrimitiveIds = GetNetwork()->get_all_primitives(); - // set split as not_run - InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out - -#else // _USE_SPLIT_PRIMITIVE - // TODO: replace with clDNN split when it's implemented - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto inDataPtr = splitLayer->insData[0].lock(); - if (!inDataPtr) { - THROW_CLDNN_EXCEPTION("Data inserts into split layer " << splitLayer->name << " is nullptr"); + // Find correct output ID. Start with name stored in IR. + std::string outputID = primitiveIDs.at(outName); + while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) { + // If current ID isn't found in cldnn network outputs, get previous primitive id and try again. + auto prim = allPrimitiveIds.find(outputID); + if (prim == allPrimitiveIds.end()) { + THROW_IE_EXCEPTION << "Unknown primitive id " << outputID; } - auto inputDims = inDataPtr->dims; - InferenceEngine::SizeVector startOffset(inputDims.size()); - - auto TensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def) { - switch (dims.size()) { - case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def)); - case 2: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, def)); - case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, dims[2])); - case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2])); - default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") in split layer"); - } - }; - for (auto& outLayer : splitLayer->outData) { - std::string outLayerName = splitLayer->type + ":" + outLayer->name; - if (outLayer->dims.size() != startOffset.size()) { - THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name); - } - for (size_t i = 0; i < inputDims.size(); i++) { - if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) { - THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name); - } - } - SizeVector reverseDims = outLayer->dims; - std::reverse(reverseDims.begin(), reverseDims.end()); - auto outTensor = TensorFromIEDims(reverseDims, 1); - - SizeVector reverseOffset = startOffset; - std::reverse(reverseOffset.begin(), reverseOffset.end()); - auto offsetTensor = TensorFromIEDims(reverseOffset, 0); - - auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor); - m_env.primitiveIDs[outLayerName] = outLayerName; - m_topology->add(cropPrim); - m_env.profilingIDs.push_back(outLayerName); - InitProfileInfo(outLayerName, "Crop"); - - for (size_t i = 0; i < inputDims.size(); i++) { - if (outLayer->dims[i] != inputDims[i]) { - startOffset[i] += outLayer->dims[i]; - } - } + if (prevPrimitiveIDs.at(outputID).size() != 1 || prim->second != "_optimized_") { + THROW_IE_EXCEPTION << "Unable to find parent for output primitive " << outputID; } - - // set split as not_run - InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out -#endif // _USE_SPLIT_PRIMITIVE - } -} - -void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr &layer) { - auto inputPrimitives = GetPrevLayersPrimitives(layer); - // only handle the split->conv->merge topology for now - auto splitLayer = as (layer); - IE_ASSERT(IsValidSplitConvMerge(splitLayer)); - - auto convLayer1 = - as (GetNextSingleLayer(splitLayer->outData[0])); - auto convLayer2 = - as (GetNextSingleLayer(splitLayer->outData[1])); - auto concatLayer = - as (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0]))); - - if (convLayer1 == nullptr || - convLayer2 == nullptr || - concatLayer == nullptr) { - THROW_CLDNN_EXCEPTION("Expected single layer does not exist"); - } - // Mark these layers as optimized out - InitProfileInfo(convLayer1->name, convLayer1->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); - InitProfileInfo(convLayer2->name, convLayer2->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); - InitProfileInfo(concatLayer->name, concatLayer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); - - // build the split conv primitive - std::vector weightPrimID; - std::vector biasPrimID; - CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[0]), weightPrimID, biasPrimID); - CreateWeightAndBiasPrimitives(GetNextSingleLayer(splitLayer->outData[1]), weightPrimID, biasPrimID); - - auto concatLayerPtr = std::make_shared(*concatLayer); - - cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), - cldnn::spatial(convLayer1->_stride[X_AXIS], convLayer1->_stride[Y_AXIS])); - auto allPad = getPaddings(*convLayer1); - cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), - cldnn::spatial(-allPad.begin[X_AXIS], -allPad.begin[Y_AXIS])); - cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), - cldnn::spatial(convLayer1->_dilation[X_AXIS], convLayer1->_dilation[Y_AXIS])); - - std::string splitLayerName = layer_type_name_ID(layer); - auto splitPrim = cldnn::convolution(splitLayerName, - inputPrimitives[0], - weightPrimID, - biasPrimID, - stride, - padding, - dilation, - false, - 0.0f, - CldnnTensorFromIEDims(concatLayer->outData[0]->dims)); - - layer = concatLayerPtr; - - m_env.primitiveIDs[splitLayerName] = splitLayerName; - m_env.primitiveIDs[layer_type_name_ID(convLayer1)] = splitLayerName; - m_env.primitiveIDs[layer_type_name_ID(convLayer2)] = splitLayerName; - m_env.primitiveIDs[layer_type_name_ID(concatLayer)] = splitLayerName; // pair the last merged layer (concat or relu) with - // this primitive name to be used as - // input prim for subsequent layers - m_topology->add(splitPrim); - m_env.profilingIDs.push_back(splitLayerName); -} - -void CLDNNGraph::CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto powerLayer = as (layer); - if (powerLayer->power != 1.0f && powerLayer->power != 0.5f) { - THROW_CLDNN_EXCEPTION("Power Layer " << layer->name << "uses unsupported power value"); + outputID = prevPrimitiveIDs.at(outputID)[0]; } - std::string powerLayerName = layer_type_name_ID(layer); - if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) { - if (powerLayer->power == 0.5f) { - auto activationPrim = cldnn::activation(powerLayerName, inputPrimitives[0], activation_sqrt); - m_topology->add(activationPrim); - m_env.profilingIDs.push_back(powerLayerName); - m_env.primitiveIDs[powerLayerName] = powerLayerName; - } else { - // skip this layer - m_env.primitiveIDs[powerLayerName] = inputPrimitives[0]; // register the previous primID for this layer too - InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::NOT_RUN); // Mark this layer as not run - } - } else { - // create scale primitive - auto scaleValuePrimName = powerLayerName + m_scalesTag; - AddSingleValuePrimitive(scaleValuePrimName, - DataTypeFromPrecision(powerLayer->precision), - powerLayer->scale); - - cldnn::primitive_id biasValuePrimName = ""; - if (powerLayer->offset != 0.0f) { - biasValuePrimName = powerLayerName + m_biasesTag; - AddSingleValuePrimitive(biasValuePrimName, - DataTypeFromPrecision(powerLayer->precision), - powerLayer->offset); - } - auto scalePrim = cldnn::scale( - powerLayerName, - inputPrimitives[0], - scaleValuePrimName, - biasValuePrimName); - - m_env.primitiveIDs[powerLayerName] = powerLayerName; - m_topology->add(scalePrim); - m_env.profilingIDs.push_back(powerLayerName); - - if (powerLayer->power == 0.5f) { - auto activationPrim = cldnn::activation(powerLayerName+"_sqrt", powerLayerName, activation_sqrt); - m_topology->add(activationPrim); - m_env.profilingIDs.push_back(powerLayerName+"_sqrt"); - } - } + return outputID; } -void CLDNNGraph::CreateSoftMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto softmaxLayer = as (layer); +InferenceEngine::SizeVector CLDNNGraph::GetOutputSize(std::string outName) const { + auto res_output = outputDims.find(outName); - // additional WA for clDNN FullyConnected output in BX instead of BF - int inputOrder = 0; - auto prevData = layer->insData[0].lock(); - - if (prevData == nullptr) { - THROW_CLDNN_EXCEPTION("SoftMax: nonexistent input for layer: " << layer->name); - } - - auto prevCreator = prevData->creatorLayer.lock(); - bool isPrevFC = false; - - if (prevCreator && (LayerTypeFromStr(prevCreator->type) == FullyConnected)) - isPrevFC = true; - // end of WA - - std::string softmaxLayerName = layer_type_name_ID(layer); - auto softmaxPrim = cldnn::softmax(softmaxLayerName, inputPrimitives[0], SoftmaxDimensionFromIEAxis(softmaxLayer, isPrevFC)); - m_env.primitiveIDs[softmaxLayerName] = softmaxLayerName; - m_topology->add(softmaxPrim); - m_env.profilingIDs.push_back(softmaxLayerName); -} - -void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto fcLayer = as (layer); - - std::string fcLayerName = layer_type_name_ID(layer); - // create bias primitive - cldnn::primitive_id biasesPrimID = ""; - if (fcLayer->_biases != nullptr) { - biasesPrimID = fcLayerName + m_biasesTag; - cldnn::layout fcbLayout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat, - cldnn::spatial(TensorValue(fcLayer->_out_num))); - CreatePrimitiveFromBlob(biasesPrimID, fcLayer->_biases, fcbLayout); - } - - // create weights primitive - // gcc bug to resolve auto, at least for 5.4 version - std::shared_ptr insData0 = fcLayer->insData[0].lock(); - IE_ASSERT(insData0 != nullptr); - cldnn::primitive_id weightsPrimID = fcLayerName + m_weightsTag; - cldnn::tensor weightsDims; - switch (insData0->dims.size()) { - case 4: - weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]), - TensorValue(insData0->dims[2]), - TensorValue(insData0->dims[0]), - TensorValue(insData0->dims[1]) }; - break; - case 2: - weightsDims = { TensorValue(fcLayer->outData[0]->dims[0]), TensorValue(insData0->dims[0]), 1, 1 }; - break; - default: THROW_CLDNN_EXCEPTION("Invalid data dimensions"); - } - cldnn::layout fcwLayout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat, weightsDims); - CreatePrimitiveFromBlob(weightsPrimID, fcLayer->_weights, fcwLayout); - - auto fcPrim = cldnn::fully_connected(fcLayerName, - inputPrimitives[0], - weightsPrimID, - biasesPrimID, - false, - 0.0f); - - m_env.primitiveIDs[fcLayerName] = fcLayerName; - m_topology->add(fcPrim); - m_env.profilingIDs.push_back(fcLayerName); -} - -void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto poolLayer = as (layer); - - std::string poolLayerName = layer_type_name_ID(layer); - auto allPads = getPaddings(*poolLayer); - if (poolLayer->outData.size() > 1) { - // max pooling with argmax - SizeVector argmaxDims; - - std::string realOutputID, argmaxOutputID; - int outputOrder = 0; - - for (auto out : poolLayer->outData) { - auto layersMap = out->getInputTo(); - - for (auto item : layersMap) { - bool isUpooling = (LayerTypeFromStr(item.second->type) == Unpooling); - if (outputOrder == 1 && isUpooling) { - argmaxDims = out->dims; - argmaxOutputID = out->name; - } else { - realOutputID = out->name; - } - outputOrder++; - } - } - - // create mutable_data primitive for storing argmax data - cldnn::tensor mutableTensor; - switch (argmaxDims.size()) { - case 4: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[3]), TensorValue(argmaxDims[2]), - TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1])); - break; - case 3: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[2]), TensorValue(argmaxDims[1]), - 1, TensorValue(argmaxDims[0])); - break; - case 2: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[1]), TensorValue(argmaxDims[0]), 1, 1); - break; - case 1: // not implemented yet. - default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions"); - } - - cldnn::layout mutableLayout = cldnn::layout( - cldnn::data_types::f32, - m_defaultFormat, - mutableTensor); - - cldnn::primitive_id argmaxPrimID = layer->name + "_argmax_mutable"; - - auto mem = cldnn::memory::allocate(*(m_env.engine), mutableLayout); - auto argmax_mutable_prim = cldnn::mutable_data(argmaxPrimID, mem); - m_topology->add(argmax_mutable_prim); - m_env.primitiveIDs[argmaxPrimID] = argmaxPrimID; - m_env.primitiveIDs[argmaxOutputID] = argmaxPrimID; - - // create pooling primitive itself - auto poolPrim = cldnn::pooling(poolLayerName, - inputPrimitives[0], - argmaxPrimID, - cldnn::pooling_mode::max_with_argmax, - cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])), // size - cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])), // stride - // input offset (padding) - explicit tensor for 0 bf - { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) }, - CldnnTensorFromIEDims(poolLayer->outData[0]->dims)); - m_topology->add(poolPrim); - m_env.primitiveIDs[realOutputID] = poolLayerName; - } else { - // regular pooling - auto poolPrim = cldnn::pooling(poolLayerName, - inputPrimitives[0], - PoolingModeFromIEPooling(poolLayer->_type, poolLayer->_exclude_pad), - cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])), // size - cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])), // stride - // input offset (padding) - explicit tensor for 0 bf - { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) }, - CldnnTensorFromIEDims(poolLayer->outData[0]->dims)); - m_topology->add(poolPrim); - m_env.primitiveIDs[poolLayerName] = poolLayerName; - } - - m_env.profilingIDs.push_back(poolLayerName); -} - -void CLDNNGraph::CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto lrnLayer = as (layer); - std::string lrnLayerName = layer_type_name_ID(layer); - auto lrnPrim = cldnn::lrn( - lrnLayerName, - inputPrimitives[0], - lrnLayer->_size, - static_cast(lrnLayer->_k), - lrnLayer->_alpha, - lrnLayer->_beta, - lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel); - - m_env.primitiveIDs[lrnLayerName] = lrnLayerName; - m_topology->add(lrnPrim); - m_env.profilingIDs.push_back(lrnLayerName); -} - -void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer, const LayerType type) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - cldnn_activation_additional_params params{ 0.0f, 0.0f }; - cldnn_activation_func func = cldnn_activation_func_t::activation_none; - - LayerType activationType; - if (type == Activation) { - std::string activation_type = layer->GetParamAsString("type"); - if (activation_type == "tanh") { - activationType = TanH; - } else if (activation_type == "sigmoid" || activation_type == "logistic") { - activationType = Sigmoid; - } else if (activation_type == "elu") { - activationType = ELU; - } else if (activation_type == "relu") { - activationType = ReLU; - } else if (activation_type == "relu6") { - activationType = ReLU6; - } else if (activation_type == "clamp") { - activationType = Clamp; - } else if (activation_type == "exp") { - activationType = Exp; - } else if (activation_type == "not") { - activationType = Not; - } else { - THROW_CLDNN_EXCEPTION("Unsupported activation type (" + activation_type + - ") in layer " + layer->name); - } - } else { - activationType = type; - } - - switch (activationType) { - case TanH: - { - func = cldnn_activation_func_t::activation_hyperbolic_tan; - break; - } - case ELU: - { - func = cldnn_activation_func_t::activation_elu; - params.a = layer->GetParamAsFloat("alpha", 1.0f); - break; - } - case Sigmoid: - { - func = cldnn_activation_func_t::activation_logistic; - break; - } - case ReLU: - { - func = cldnn_activation_func_t::activation_relu_negative_slope; - params.a = layer->GetParamAsFloat("negative_slope", 0.0f); - break; - } - case ReLU6: - { - func = cldnn_activation_func_t::activation_clamp; - params.b = layer->GetParamAsFloat("n", 6.0f); - break; - } - case Clamp: - { - func = cldnn_activation_func_t::activation_clamp; - params.a = layer->GetParamAsFloat("min"); - params.b = layer->GetParamAsFloat("max"); - break; - } - case Exp: - { - func = cldnn_activation_func_t::activation_exp; - break; - } - case Not: - { - func = cldnn_activation_func_t::activation_not; - break; - } - default: - THROW_CLDNN_EXCEPTION("Unsupported activation type (" + layer->type + - ") in layer " + layer->name); - } - - std::string layerName = layer_type_name_ID(layer); - auto activationPrimitive = cldnn::activation(layerName, inputPrimitives[0], func, params); - m_env.primitiveIDs[layerName] = layerName; - m_topology->add(activationPrimitive); - m_env.profilingIDs.push_back(layerName); -} - -void CLDNNGraph::CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto copyLayer = as (layer); - - // Optimize out and just update references - std::string layerName = layer_type_name_ID(layer); - m_env.primitiveIDs[layerName] = inputPrimitives[0]; - InitProfileInfo(layerName, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out -} - -void CLDNNGraph::CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer) { - // Assuming multi-input will be handled by prev concat/eltwise layers - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto upsamplingLayer = as (layer); - uint32_t scale = upsamplingLayer->GetParamAsUInt("scale"); - uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter"); - std::string sampleType = upsamplingLayer->GetParamAsString("sample_type"); - - std::string upsamplingLayerName = layer_type_name_ID(layer); - auto upsamplingPrim = cldnn::upsampling( - upsamplingLayerName, - inputPrimitives[0], - scale, - numFilter, - UpsamplingTypeFromString(sampleType)); - - m_env.primitiveIDs[upsamplingLayerName] = upsamplingLayerName; - m_topology->add(upsamplingPrim); - m_env.profilingIDs.push_back(upsamplingLayerName); -} - -void CLDNNGraph::CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto resampleLayer = as (layer); - - auto outDims = layer->outData[0]->dims; - size_t inFeatures = 1; - unsigned int scale = 1; - std::shared_ptr insData0 = layer->insData[0].lock(); - IE_ASSERT(insData0 != nullptr); - if (insData0->dims.size() > 2) { - inFeatures = insData0->dims[2]; - scale = outDims[0]/insData0->dims[0]; - if (scale < 1) { - THROW_CLDNN_EXCEPTION("Unsupported scale in layer " + layer->name); - } - } - std::string sampleType = resampleLayer->GetParamAsString("type"); - - if (sampleType != "caffe.ResampleParameter.NEAREST") { - THROW_CLDNN_EXCEPTION("Unsupported resampling type (" + sampleType + ") in layer " + layer->name); - } - - std::string resampleLayerName = layer_type_name_ID(layer); - auto upsamplingPrim = cldnn::upsampling( - resampleLayerName, - inputPrimitives[0], - scale, - inFeatures, - cldnn::upsampling_sample_type::nearest); - - m_env.primitiveIDs[resampleLayerName] = resampleLayerName; - m_topology->add(upsamplingPrim); - m_env.profilingIDs.push_back(resampleLayerName); -} - -void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto YOLOregionLayer = as (layer); - - uint32_t coords = YOLOregionLayer->GetParamAsUInt("coords", 4); - uint32_t classes = YOLOregionLayer->GetParamAsUInt("classes", 20); - uint32_t num = YOLOregionLayer->GetParamAsUInt("num", 1); - bool do_softmax = YOLOregionLayer->GetParamsAsBool("do_softmax", true); - - uint32_t mask_size = 0; - if (HasParam(YOLOregionLayer->params, "mask")) { - const auto mask = YOLOregionLayer->GetParamAsInts("mask"); - mask_size = static_cast(mask.size()); - } - - std::string YOLOregionLayerName = layer_type_name_ID(layer); - auto regionPrim = cldnn::region_yolo( - YOLOregionLayerName, - inputPrimitives[0], - coords, - classes, - num, - mask_size, - do_softmax); - - m_env.primitiveIDs[YOLOregionLayerName] = YOLOregionLayerName; - m_topology->add(regionPrim); - m_env.profilingIDs.push_back(YOLOregionLayerName); -} - -void CLDNNGraph::CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto YOLOreorgLayer = as (layer); // as (layer); - uint32_t stride = YOLOreorgLayer->GetParamAsUInt("stride"); - - std::string YOLOreorgLayerName = layer_type_name_ID(layer); - auto reorgPrim = cldnn::reorg_yolo( - YOLOreorgLayerName, - inputPrimitives[0], - stride); - - m_env.primitiveIDs[YOLOreorgLayerName] = YOLOreorgLayerName; - m_topology->add(reorgPrim); - m_env.profilingIDs.push_back(YOLOreorgLayerName); -} - -void CLDNNGraph::CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto ArgMaxLayer = as (layer); - const cldnn::arg_max_min::out_type otype = cldnn::arg_max_min::out_type::max; - - if (HasParam(ArgMaxLayer->params, "out_max_val")) { - int32_t out_max_val_flag = ArgMaxLayer->GetParamAsInt("out_max_val"); - if (out_max_val_flag != 0) { - THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "ArgMax: out_max_val param is not supported for layer: " << layer->name; - } - } - - uint32_t top_k = ArgMaxLayer->GetParamAsUInt("top_k", 1); - - cldnn::arg_max_min::axis_name chosen_axis = cldnn::arg_max_min::axis_name::xyf; - - if (HasParam(ArgMaxLayer->params, "axis")) { - int32_t axis_param = ArgMaxLayer->GetParamAsInt("axis", 1); - - int32_t axis = axis_param; - if (-4 <= axis && axis <= -1) - axis += 4; - - switch (axis) { - case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break; - case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break; - case 2: chosen_axis = cldnn::arg_max_min::axis_name::y; break; - case 3: chosen_axis = cldnn::arg_max_min::axis_name::x; break; - } - } - - std::string ArgMaxLayerName = layer_type_name_ID(layer); - auto argmaxPrim = cldnn::arg_max_min( - ArgMaxLayerName, - inputPrimitives[0], - otype, - top_k, - chosen_axis); - - m_env.primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName; - m_topology->add(argmaxPrim); - m_env.profilingIDs.push_back(ArgMaxLayerName); -} - -void CLDNNGraph::CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 2); - - auto UnpoolingLayer = as (layer); - - cldnn::primitive_id real_input, argmax_mutable; - - // locate ArgMax primitive - int inputOrder = 0; - for (auto inputData : layer->insData) { - auto prevData = inputData.lock(); - - if (prevData == nullptr) { - THROW_CLDNN_EXCEPTION("MaxUnpooling: nonexistent input for layer: " << layer->name); - } - - auto prevCreator = prevData->creatorLayer.lock(); - - if (prevCreator && - (LayerTypeFromStr(prevCreator->type) == Pooling) && - prevCreator->outData.size() > 1 && - inputOrder == 1) { - argmax_mutable = m_env.primitiveIDs.at(prevCreator->name + "_argmax_mutable"); - } else { - real_input = m_env.primitiveIDs.at(prevData->name); - } - inputOrder++; - } - - uint32_t stride = UnpoolingLayer->GetParamAsUInt("stride"); - uint32_t kernel_size = UnpoolingLayer->GetParamAsUInt("kernel_size"); - - std::string UnpoolingLayerName = layer_type_name_ID(layer); - auto unpoolingPrim = cldnn::max_unpooling( - UnpoolingLayerName, - real_input, - argmax_mutable, - cldnn::spatial(kernel_size, kernel_size), // size - cldnn::spatial(stride, stride) ); // stride - - m_env.primitiveIDs[UnpoolingLayerName] = UnpoolingLayerName; - m_topology->add(unpoolingPrim); - m_env.profilingIDs.push_back(UnpoolingLayerName); -} - -void CLDNNGraph::CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto MvnLayer = as (layer); - - bool across_channels = MvnLayer->GetParamsAsBool("across_channels", false); - bool normalize_variance = MvnLayer->GetParamsAsBool("normalize_variance", true); - float eps = MvnLayer->GetParamAsFloat("eps", 1e-10f); - - std::string MvnLayerName = layer_type_name_ID(layer); - auto mvnPrim = cldnn::mvn( - MvnLayerName, - inputPrimitives[0], - across_channels, - normalize_variance, - eps); - - m_env.primitiveIDs[MvnLayerName] = MvnLayerName; - m_topology->add(mvnPrim); - m_env.profilingIDs.push_back(MvnLayerName); -} - -void CLDNNGraph::CreateTilePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto tileLayer = as (layer); - - int axis = tileLayer->GetParamAsInt("axis", 1); - int tiles = tileLayer->GetParamAsInt("tiles"); - - auto cldnnAxisFromIE = [](int axis) { - switch (axis) { - case 0: return cldnn::tile::tile_axis::along_b; - case 1: return cldnn::tile::tile_axis::along_f; - case 2: return cldnn::tile::tile_axis::along_y; - case 3: return cldnn::tile::tile_axis::along_x; - default: THROW_CLDNN_EXCEPTION("Unsupported tile axis: " << axis); - } - }; - std::string tileLayerName = layer_type_name_ID(layer); - auto tilePrim = cldnn::tile( - tileLayerName, - inputPrimitives[0], - cldnnAxisFromIE(axis), - tiles); - - m_env.primitiveIDs[tileLayerName] = tileLayerName; - m_topology->add(tilePrim); - m_env.profilingIDs.push_back(tileLayerName); -} - -void CLDNNGraph::CreatePadPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto padLayer = as (layer); - - auto PadTensorFromArgs = [](const std::string &s) -> cldnn::tensor { - std::stringstream ss(s); - std::string item; - std::vector elems; - while (std::getline(ss, item, ',')) { - elems.push_back(static_cast(std::atoll(item.c_str()))); - } - - while (elems.size() < 4) { - elems.push_back(0); - } - - // Swap x and y - auto tmp = elems[2]; - elems[2] = elems[3]; - elems[3] = tmp; - - return cldnn::tensor(elems, 0); - }; - - auto pads_begin = PadTensorFromArgs(padLayer->GetParamAsString("pads_begin")); - auto pads_end = PadTensorFromArgs(padLayer->GetParamAsString("pads_end")); - std::string mode = padLayer->GetParamAsString("pad_mode"); - float pad_value = padLayer->GetParamAsFloat("pad_value", 0.0f); - - cldnn::border_type border_mode; - if (mode == "constant") - border_mode = cldnn::border_type::constant; - else if (mode == "edge") - border_mode = cldnn::border_type::edge; - else if (mode == "symmetric") - border_mode = cldnn::border_type::mirror; - else if (mode == "reflect") - border_mode = cldnn::border_type::mirror_101; - else - THROW_CLDNN_EXCEPTION("Invalid border mode " << mode << " in layer " << padLayer->name); - - std::string padLayerName = layer_type_name_ID(layer); - auto tilePrim = cldnn::border( - padLayerName, - inputPrimitives[0], - pads_begin, - pads_end, - border_mode, - pad_value); - - m_env.primitiveIDs[padLayerName] = padLayerName; - m_topology->add(tilePrim); - m_env.profilingIDs.push_back(padLayerName); -} - -std::string get_string_id(size_t i) { - std::stringstream ss; - ss << std::setw(5) << std::setfill('0') << i; - return ss.str(); -} - -void CLDNNGraph::CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer) { - int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size; - SizeVector in_dims1, in_dims2; - bool hasBias = false; - auto inputPrimitives = GetPrevLayersPrimitives(layer); - - auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision)); - std::string layerName = layer_type_name_ID(layer); - cldnn::primitive_id weightID = layerName + m_weightsTag; - cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag; - cldnn::primitive_id biasID = layerName + m_biasesTag; - auto cellLayer = as (layer); - - /* check incoming CNN layer and setup required variables */ - { - auto in_data0 = layer->insData[0].lock(); - if (!in_data0) - THROW_IE_EXCEPTION << "Missing first input for LSTMCell layer " << layer->name; - - auto in_dims0 = in_data0->dims; - auto out_dims0 = layer->outData[0]->dims; - - lstm_input_size = in_dims0[0]; - lstm_batch_size = in_dims0[1]; - lstm_hidden_size = out_dims0[0]; - - /* do we have initial hidden and cell? - if blobs are not null, direct the data from them - into corresponding LSTM inputs */ - - auto in_data1 = layer->insData[1].lock(); - if (!in_data1) - THROW_IE_EXCEPTION << "Missing second input for LSTMCell layer " << layer->name; - in_dims1 = in_data1->dims; - - - auto in_data2 = layer->insData[2].lock(); - if (!in_data2) - THROW_IE_EXCEPTION << "Missing third input for LSTMCell layer " << layer->name; - in_dims2 = in_data2->dims; - - - if (in_dims0.size() != 2 || in_dims1.size() != 2 || in_dims2.size() != 2) - THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell Layer " << layer->name; - } - - /* Prepare weight/bias memory primitives - split weight blob into W and R */ - { - cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size)); - cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size)); - cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor); - cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor); - - auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout); - auto wtmpPointer = wmem.pointer(); // implicitly maps buffer - unmap in destructor - - auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout); - auto rtmpPointer = rmem.pointer(); - - auto wLayer = as (layer); - auto pWeightsBlob = wLayer->_weights; - auto blobBytes = static_cast(pWeightsBlob->buffer()); - const size_t WchunkSz = lstm_input_size * elementSize; - const size_t RchunkSz = lstm_hidden_size * elementSize; - - auto wBytes = wtmpPointer.data(); - auto rBytes = rtmpPointer.data(); - - for (int h = 0; h < 4 * lstm_hidden_size; h++) { - // copy "input size" elements to W - for (size_t b = 0; b < WchunkSz; b++) - *wBytes++ = *blobBytes++; - - // copy "lstm_hidden_size" elements to R - for (size_t b = 0; b < RchunkSz; b++) - *rBytes++ = *blobBytes++; - } - - m_topology->add(cldnn::data(weightID, wmem)); - m_topology->add(cldnn::data(recurrentID, rmem)); - - /* create bias memory primitive */ - auto pBiasBlob = wLayer->_biases; - if (pBiasBlob != nullptr) { - cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1)); - cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor); - - auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout); - auto btmpPointer = bmem.pointer(); - - auto blobBytes = static_cast(pBiasBlob->buffer()); - const size_t BchunkSz = lstm_hidden_size * elementSize; - auto bBytes = btmpPointer.data(); - - for (size_t b = 0; b < 4 * BchunkSz; b++) - *bBytes++ = *blobBytes++; - - m_topology->add(cldnn::data(biasID, bmem)); - hasBias = true; - } - } - - cldnn::primitive_id inReshapeID = layerName + "_inReshape"; - cldnn::primitive_id permuteID = layerName + "_inputReorder"; - cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape"; - - cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 }; - cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 }; - cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape); - m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape)); - m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout)); - - std::string hiddenInStr = inHiddenReshapeID + "_1"; - std::string cellInStr = inHiddenReshapeID + "_2"; - m_topology->add(cldnn::reshape(hiddenInStr, inputPrimitives[1], hiddenStateShape)); - m_topology->add(cldnn::reshape(cellInStr, inputPrimitives[2], hiddenStateShape)); - - cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 }; - cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0}; - - std::string lstm_gemm_id = layerName + "_lstm_gemm"; - std::string lstm_elt_id = layerName + "_lstm_elt"; - std::string crop_id = layerName + "_crop"; - - m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, permuteID, - weightID, recurrentID, - hasBias ? biasID : "", - hiddenInStr)); - m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id, cellInStr, - 0, 0, {}, {}, cldnn_lstm_offset_order_fizo)); - - cldnn::primitive_id outputHiddenID = layerName; - m_topology->add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0})); - cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[1]->name; - m_topology->add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz)); - - // output primitive IDs - m_env.primitiveIDs[outputHiddenID] = outputHiddenID; // LSTMCell:LSTMCell - "concat hidden" - m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = outputHiddenID; // LSTMCell:LSTMCell:0 - hidden state - m_env.primitiveIDs[outputCellID] = outputCellID; // LSTMCell:LSTMCell:1 - cell state - - m_env.profilingIDs.push_back(layerName); -} - -void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) { - int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size; - SizeVector in_dims1, in_dims2; - bool hasInitialHidden = false, hasInitialCell = false, hasBias = false, isForward = true; - auto inputPrimitives = GetPrevLayersPrimitives(layer); - - auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision)); - std::string layerName = layer_type_name_ID(layer); - cldnn::primitive_id weightID = layerName + m_weightsTag; - cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag; - cldnn::primitive_id biasID = layerName + m_biasesTag; - auto rnnLayer = as (layer); - bool permute_input = (1 != rnnLayer->axis); - - /* check incoming CNN layer and setup required variables */ - { - if (rnnLayer->cellType != RNNSequenceLayer::LSTM) - THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell"; - - auto in_data0 = layer->insData[0].lock(); - if (!in_data0) - THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name; - - auto in_dims0 = in_data0->dims; - auto out_dims0 = layer->outData[0]->dims; - - if (!permute_input) { - lstm_batch_size = in_dims0[2]; - lstm_sequence_len = in_dims0[1]; - } else { - lstm_batch_size = in_dims0[1]; - lstm_sequence_len = in_dims0[2]; - } - - lstm_input_size = in_dims0[0]; - lstm_hidden_size = out_dims0[0]; - - /* do we have initial hidden and cell? - if blobs are not null, direct the data from them - into corresponding LSTM inputs */ - - auto in_data1 = layer->insData[1].lock(); - if (in_data1) { - in_dims1 = in_data1->dims; - hasInitialHidden = true; - } - - auto in_data2 = layer->insData[2].lock(); - if (in_data2) { - in_dims2 = in_data2->dims; - hasInitialCell = true; - } - - if (rnnLayer->direction != RNNSequenceLayer::FWD && rnnLayer->direction != RNNSequenceLayer::BWD) - THROW_IE_EXCEPTION << "Support only forward and backward direction for RNN Layer " << layer->name; - isForward = rnnLayer->direction == RNNSequenceLayer::FWD; - - if (in_dims0.size() != 3 || in_dims1.size() != 2 || in_dims2.size() != 2) - THROW_IE_EXCEPTION << "Wrong input shapes for RNN Layer " << layer->name; - } - - /* Prepare weight/bias memory primitives - split weight blob into W and R */ - { - cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size)); - cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size)); - cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor); - cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor); - - auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout); - auto wtmpPointer = wmem.pointer(); // implicitly maps buffer - unmap in destructor - - auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout); - auto rtmpPointer = rmem.pointer(); - - auto wLayer = as (layer); - auto pWeightsBlob = wLayer->_weights; - auto blobBytes = static_cast(pWeightsBlob->buffer()); - const size_t WchunkSz = lstm_input_size * elementSize; - const size_t RchunkSz = lstm_hidden_size * elementSize; - - auto wBytes = wtmpPointer.data(); - auto rBytes = rtmpPointer.data(); - - for (int h = 0; h < 4 * lstm_hidden_size; h++) { - // copy "input size" elements to W - for (size_t b = 0; b < WchunkSz; b++) - *wBytes++ = *blobBytes++; - - // copy "lstm_hidden_size" elements to R - for (size_t b = 0; b < RchunkSz; b++) - *rBytes++ = *blobBytes++; - } - - m_topology->add(cldnn::data(weightID, wmem)); - m_topology->add(cldnn::data(recurrentID, rmem)); - - /* create bias memory primitive */ - auto pBiasBlob = wLayer->_biases; - if (pBiasBlob != nullptr) { - cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1)); - cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor); - - auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout); - auto btmpPointer = bmem.pointer(); - - auto blobBytes = static_cast(pBiasBlob->buffer()); - const size_t BchunkSz = lstm_hidden_size * elementSize; - auto bBytes = btmpPointer.data(); - - for (size_t b = 0; b < 4 * BchunkSz; b++) - *bBytes++ = *blobBytes++; - - m_topology->add(cldnn::data(biasID, bmem)); - hasBias = true; - } - } - - std::vector> input_ids_offsets; - std::vector output_ids_offsets; - - cldnn::primitive_id inReshapeID = layerName + "_inReshape"; - cldnn::primitive_id permuteID = layerName + "_inputReorder"; - cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape"; - - cldnn::tensor inputShape; - - if (permute_input) { - inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, 1 }; - } else { - inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 }; - } - cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 }; - cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape); - m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape)); - m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout)); - - m_topology->add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape)); - m_topology->add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape)); - - for (int i = 0; i < lstm_sequence_len; ++i) - input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} }); - - cldnn::primitive_id inputSplitID = layerName + "_inputSplit"; - - if (permute_input) { - m_topology->add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 })); - m_topology->add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets)); - } else { - m_topology->add(cldnn::split(inputSplitID, permuteID, input_ids_offsets)); - } - - cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 }; - cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0}; - std::string hiddenStr = hasInitialHidden ? inHiddenReshapeID+"_1" : ""; - std::string cellStr = hasInitialCell ? inHiddenReshapeID+"_2" : ""; - - for (int i = 0; i < lstm_sequence_len; ++i) { - std::string lstm_gemm_id = layerName + "_lstm_gemm" + get_string_id(i); - std::string lstm_elt_id = layerName + "_lstm_elt" + get_string_id(i); - std::string crop_id = layerName + "_crop" + get_string_id(i); - - int seqIdx = isForward ? i : lstm_sequence_len - 1 - i; - m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, inputSplitID + ":" + get_string_id(seqIdx), - weightID, recurrentID, - hasBias ? biasID : "", - hiddenStr)); - m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id, - cellStr, 0, 0, {}, {}, - cldnn_lstm_offset_order_fizo)); - - hiddenStr = crop_id + ":hidden"; - cellStr = crop_id + ":cell"; - m_topology->add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 })); - output_ids_offsets.push_back(hiddenStr); - - if (i < lstm_sequence_len - 1) { - m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz)); - } else { - // last hidden state crop (output 2) - if (layer->outData.size() > 1) { - cldnn::primitive_id outputHiddenID = layer->type + ":" + layer->outData[1]->name; - m_env.primitiveIDs[hiddenStr] = hiddenStr; - m_env.primitiveIDs[outputHiddenID] = hiddenStr; - } - - // last cell state crop (output 3) - if (layer->outData.size() > 2) { - m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz)); - cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[2]->name; - m_env.primitiveIDs[cellStr] = cellStr; - m_env.primitiveIDs[outputCellID] = cellStr; - } - } - } - - if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end()); - - if (permute_input) { - m_topology->add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f)); - m_topology->add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 })); - } else { - m_topology->add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f)); - } - - m_env.primitiveIDs[layerName] = layerName; - m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = layerName; - m_env.profilingIDs.push_back(layerName); -} - -void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) { - auto constBlob = layer->blobs.begin()->second; - auto constDims = layer->outData[0]->dims; - - cldnn::tensor constTensor; - switch (constDims.size()) { - case 4: constTensor = cldnn::tensor(TensorValue(constDims[3]), TensorValue(constDims[2]), - TensorValue(constDims[0]), TensorValue(constDims[1])); - break; - case 3: constTensor = cldnn::tensor(TensorValue(constDims[2]), TensorValue(constDims[1]), - 1, TensorValue(constDims[0])); - break; - case 2: constTensor = cldnn::tensor(TensorValue(constDims[1]), TensorValue(constDims[0]), 1, 1); - break; - case 1: constTensor = cldnn::tensor(TensorValue(constDims[0]), 1, 1, 1); - break; - default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions"); - } - - cldnn::layout constLayout = cldnn::layout( - DataTypeFromPrecision(layer->blobs.begin()->second->precision()), - m_defaultFormat, - constTensor); - - size_t bytes = constLayout.bytes_count(); - cldnn::primitive_id constPrimID = layer_type_name_ID(layer); - - CreatePrimitiveFromBlob(constPrimID, constBlob, constLayout); - m_env.primitiveIDs[constPrimID] = constPrimID; -} - -void CLDNNGraph::CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto convLayer = as (layer); - - std::vector weightPrimID; - std::vector biasPrimID; - CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID); - - cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), - cldnn::spatial(convLayer->_stride[X_AXIS], convLayer->_stride[Y_AXIS])); - auto allPad = getPaddings(*convLayer); - cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), - cldnn::spatial(-allPad.begin[X_AXIS], -allPad.begin[Y_AXIS])); - cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), - cldnn::spatial(convLayer->_dilation[X_AXIS], convLayer->_dilation[Y_AXIS])); - - std::string convLayerName = layer_type_name_ID(layer); - if (convLayer->_group >= 16) { - auto convPrim = cldnn::convolution(convLayerName, - inputPrimitives[0], - weightPrimID, - biasPrimID, - convLayer->_group, - stride, - padding, - dilation, - false, - 0.0, - CldnnTensorFromIEDims(convLayer->outData[0]->dims)); - m_topology->add(convPrim); - } else { - auto convPrim = cldnn::convolution(convLayerName, - inputPrimitives[0], - weightPrimID, - biasPrimID, - stride, - padding, - dilation, - false, - 0.0f, - CldnnTensorFromIEDims(convLayer->outData[0]->dims)); - m_topology->add(convPrim); - } - m_env.primitiveIDs[convLayerName] = convLayerName; - m_env.profilingIDs.push_back(convLayerName); -} - -void CLDNNGraph::CreateGatherPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 2); - - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto gatherLayer = as (layer); - - int axis = gatherLayer->GetParamAsInt("axis", 0); - - // Be careful, TensorFlow consist negative axis interpretation bug. Here: -3 = b, -2 = f, -1 = y, but must be -3 = f, -2 = y, -1 = x - auto cldnnAxisFromIE = [](int axis) { - switch (axis) { - case 0: return cldnn::gather::gather_axis::along_b; - case 1: return cldnn::gather::gather_axis::along_f; - case 2: return cldnn::gather::gather_axis::along_y; - case 3: return cldnn::gather::gather_axis::along_x; - case -1: return cldnn::gather::gather_axis::along_y; - case -2: return cldnn::gather::gather_axis::along_f; - case -3: return cldnn::gather::gather_axis::along_b; - default: THROW_CLDNN_EXCEPTION("Unsupported gather axis: " << axis); - } - }; - - std::string gatherLayerName = layer_type_name_ID(layer); - auto gatherPrim = cldnn::gather( - gatherLayerName, - inputPrimitives[0], - inputPrimitives[1], - cldnnAxisFromIE(axis), - CldnnTensorFromIEDims(gatherLayer->outData[0]->dims)); - - m_env.primitiveIDs[gatherLayerName] = gatherLayerName; - m_topology->add(gatherPrim); - m_env.profilingIDs.push_back(gatherLayerName); -} - -void CLDNNGraph::CreateDepthToSpacePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto depthToSpace = as (layer); - - size_t blockSize = depthToSpace->GetParamAsInt("block_size", 2); - - if (depthToSpace->input().get()->dims.size() != 4) - THROW_CLDNN_EXCEPTION("Unsupported size of tensor " << depthToSpace->input().get()->dims.size()); - - size_t blockSizeSquare = blockSize * blockSize; - - if (depthToSpace->input().get()->dims[2] % blockSizeSquare != 0) - THROW_CLDNN_EXCEPTION("The depth of the input tensor must be divisible by squared block size = " << blockSizeSquare); - - std::string depthToSpaceName = layer_type_name_ID(layer); - auto depthToSpacePrim = cldnn::depth_to_space( - depthToSpaceName, - inputPrimitives[0], - blockSize); - - m_env.primitiveIDs[depthToSpaceName] = depthToSpaceName; - m_topology->add(depthToSpacePrim); - m_env.profilingIDs.push_back(depthToSpaceName); -} - -void CLDNNGraph::CreateShuffleChannelsPrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 1); - - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto shuffleChannels = as (layer); - const int32_t numberOfDims = shuffleChannels->input()->getDims().size(); - - int32_t group = shuffleChannels->GetParamAsInt("group", 1); - int32_t axis = shuffleChannels->GetParamAsInt("axis", 1); - - if (axis < 0) - axis += numberOfDims; - - if (axis < 0 || axis >= numberOfDims) - THROW_CLDNN_EXCEPTION("Incorrect axis value! Actual axis is" + std::to_string(group)); - - if (group < 1) - THROW_CLDNN_EXCEPTION("Invalid group size value (should equal at least one). Actual block size is" + - std::to_string(group)); - - if (shuffleChannels->input().get()->getDims()[axis] % group != 0) - THROW_CLDNN_EXCEPTION("Group parameter must evenly divide the channel dimension. Actual group size is " + - std::to_string(axis)); - - std::string shuffleChannelsName = layer_type_name_ID(layer); - auto shuffleChannelsPrim = cldnn::shuffle_channels( - shuffleChannelsName, - inputPrimitives[0], - group, - axis); - - m_env.primitiveIDs[shuffleChannelsName] = shuffleChannelsName; - m_topology->add(shuffleChannelsPrim); - m_env.profilingIDs.push_back(shuffleChannelsName); -} - -void CLDNNGraph::CreateStridedSlicePrimitive(InferenceEngine::CNNLayerPtr &layer) { - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto stridedSliceLayer = as (layer); - - auto tmp = stridedSliceLayer->GetParamAsUInts("end_mask"); - std::vector end_mask(tmp.begin(), tmp.end()); - tmp = stridedSliceLayer->GetParamAsUInts("begin_mask"); - std::vector begin_mask(tmp.begin(), tmp.end()); - tmp = stridedSliceLayer->GetParamAsUInts("new_axis_mask"); - std::vector new_axis_mask(tmp.begin(), tmp.end()); - tmp = stridedSliceLayer->GetParamAsUInts("shrink_axis_mask"); - std::vector shrink_axis_mask(tmp.begin(), tmp.end()); - - std::string stridedSliceLayerName = layer_type_name_ID(layer); - auto stridedSlicePrim = cldnn::strided_slice( - stridedSliceLayerName, - inputPrimitives[0], inputPrimitives[1], inputPrimitives[2], inputPrimitives[3], - begin_mask, end_mask, new_axis_mask, shrink_axis_mask); - - m_env.primitiveIDs[stridedSliceLayerName] = stridedSliceLayerName; - m_topology->add(stridedSlicePrim); - m_env.profilingIDs.push_back(stridedSliceLayerName); -} - -void CLDNNGraph::CreateReverseSequencePrimitive(InferenceEngine::CNNLayerPtr &layer) { - ValidateLayer(layer, 2); - - auto inputPrimitives = GetPrevLayersPrimitives(layer); - auto reverseSequence = as (layer); - const int32_t numberOfDims = reverseSequence->input()->getDims().size(); - - const auto input = reverseSequence->insData[0].lock()->getDims(); - const auto sequence_lengths = reverseSequence->insData[1].lock()->getDims(); - - int32_t batch_axis = reverseSequence->GetParamAsInt("batch_axis", 0); - int32_t seq_axis = reverseSequence->GetParamAsInt("seq_axis", 1); - - if (batch_axis < 0) - batch_axis += input.size(); - - if (seq_axis < 0) - seq_axis += input.size(); - - if (batch_axis == seq_axis) - THROW_CLDNN_EXCEPTION("Batch axis and sequence axis should not be equal\n"); - - if (seq_axis < 0 || seq_axis >= input.size()) - THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(seq_axis)); - - if (batch_axis < 0 || batch_axis >= input.size()) - THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(batch_axis)); - - if (sequence_lengths[0] != input[batch_axis]) - THROW_CLDNN_EXCEPTION("Sequence lengths must be a vector of length " + std::to_string(input[batch_axis]) - + "! Actual axis is " + std::to_string(sequence_lengths[0])); - - std::string reverseSequenceLayerName = layer_type_name_ID(layer); - auto reverseSequencePrim = cldnn::reverse_sequence( - reverseSequenceLayerName, - inputPrimitives[0], - inputPrimitives[1], - seq_axis, - batch_axis); - - m_env.primitiveIDs[reverseSequenceLayerName] = reverseSequenceLayerName; - m_topology->add(reverseSequencePrim); - m_env.profilingIDs.push_back(reverseSequence->name); -} - -bool CLDNNGraph::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const { - if (splitLayer->outData.size() != 2) return false; // split into 2 - - for (auto out : splitLayer->outData) { - if (out->getInputTo().size() != 1) { - return false; - } - } - - auto convLayer1 = - as (GetNextSingleLayer(splitLayer->outData[0])); - auto convLayer2 = - as (GetNextSingleLayer(splitLayer->outData[1])); - if (!convLayer1 || !convLayer2) { // outputs aren't convolutions - return false; - } - auto allPad1 = getPaddings(*convLayer1); - auto allPad2 = getPaddings(*convLayer2); - if (convLayer1->precision != convLayer2->precision // wrong precision - || convLayer1->_fusedWith || convLayer2->_fusedWith // convolutions are fused - || convLayer1->outData.size() != 1 || convLayer2->outData.size() != 1 // more than 1 output for convolutions - || allPad1.begin[X_AXIS] != allPad2.begin[X_AXIS] // different padding - || allPad1.begin[Y_AXIS] != allPad2.begin[Y_AXIS] // different padding - || convLayer1->_stride[X_AXIS] != convLayer2->_stride[X_AXIS] // different strides - || convLayer1->_stride[Y_AXIS] != convLayer2->_stride[Y_AXIS] // different strides - || convLayer1->_dilation[X_AXIS] != convLayer2->_dilation[X_AXIS] // different dilation - || convLayer1->_dilation[Y_AXIS] != convLayer2->_dilation[Y_AXIS] // different dilation - || (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])) // no merge after convolutions - != GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[1]))) - || (p_currentOutputs->find(convLayer1->name) != p_currentOutputs->end()) - || (p_currentOutputs->find(convLayer2->name) != p_currentOutputs->end())) { - return false; - } - auto concatLayer = - as ( - GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0]))); - if (!concatLayer || // not a merge layer - concatLayer->_axis != 1 || // merge on unsupported axis - concatLayer->outData.size() != 1) { // too many outputs - return false; - } - if (m_config.customLayers.find(convLayer1->type) != m_config.customLayers.end() || - m_config.customLayers.find(concatLayer->type) != m_config.customLayers.end()) { - return false; // convolution or concat were overwritten by a custom layer - } - - return true; -} - -void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo, Precision inputPrecision) { - // first create and add the input layout - auto inputDims = inputInfo->getDims(); - InferenceEngine::Layout l = inputInfo->getTensorDesc().getLayout(); - auto consumers = inputInfo->getInputData()->getInputTo(); - bool single_consumer = consumers.size() == 1; - CLDNNGraph::LayerType consumerType = LayerTypeFromStr(consumers.begin()->second->type); - - cldnn::tensor dataTensor; - cldnn::tensor::value_type batch = (m_env.m_max_batch <= 1) - ? (inputDims.size() == 4 ? TensorValue(inputDims[3]) : 1) - : TensorValue(m_curBatch); - switch (inputDims.size()) { - case 4: - if (InferenceEngine::Layout::NCHW == l || InferenceEngine::Layout::CHW == l) { - dataTensor = cldnn::tensor(batch, - TensorValue(inputDims[2]), TensorValue(inputDims[0]), - TensorValue(inputDims[1])); - } else if (InferenceEngine::Layout::NHWC == l) { - dataTensor = cldnn::tensor(batch, - TensorValue(inputDims[2]), TensorValue(inputDims[0]), - TensorValue(inputDims[1])); - } else { - THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 4D input " + inputInfo->name()); - } - break; - case 3: - if (InferenceEngine::Layout::CHW == l) { - dataTensor = cldnn::tensor(TensorValue(inputDims[2]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[0])); - } else { - THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 3D input " + inputInfo->name()); - } - break; - case 2: - if (InferenceEngine::Layout::NCHW == l) { - dataTensor = cldnn::tensor(1, 1, TensorValue(inputDims[1]), TensorValue(inputDims[0])); - } else if (InferenceEngine::NC == l) { - dataTensor = cldnn::tensor(TensorValue(inputDims[1]), TensorValue(inputDims[0]), 1, 1); - } else { - THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 2D input " + inputInfo->name()); - } - break; - case 1: - dataTensor = cldnn::tensor(TensorValue(inputDims[0]), 1, 1, 1); - break; - default: THROW_CLDNN_EXCEPTION("Invalid data dimensions"); - } - - cldnn::layout inputLayout(DataTypeFromPrecision(inputInfo->getInputPrecision()), - FormatFromLayout(l), - dataTensor); - - // save the input dims - m_env.inputLayouts.insert({ inputInfo->name(), inputLayout }); - - auto inputName = "Input:" + inputInfo->name(); - m_topology->add(cldnn::input_layout(inputName, inputLayout)); - - // create preprocess primitive for this input - auto preProcess = inputInfo->getPreProcess(); - - size_t meanChannels = preProcess.getNumberOfChannels(); - inputLayout.format = m_defaultFormat; - inputLayout.size = inputLayout.size.transform(m_defaultFormat, 1); - inputLayout.data_type = DataTypeFromPrecision(inputPrecision); - auto preprocessPrimID = inputName + m_preProcessTag; - - if ((meanChannels > 0) && - (meanChannels != inputLayout.size.feature[0])) { - THROW_CLDNN_EXCEPTION("Mismatched mean values channels in input " + inputName); - } - - switch (preProcess.getMeanVariant()) { - case NONE: - case MEAN_VALUE: { - std::vector meanValues; - if (meanChannels > 0) { - for (size_t c = 0; c < meanChannels; c++) { - if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10) - THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName); - meanValues.push_back(preProcess[c]->meanValue); - } - } - m_topology->add(cldnn::reorder(preprocessPrimID, inputName, inputLayout, meanValues)); - m_env.profilingIDs.push_back(preprocessPrimID); - InitProfileInfo(preprocessPrimID, "Reorder"); - } - break; - - case MEAN_IMAGE: { - IE_ASSERT(meanChannels); - // first merge all mean values to a single blob - // todo make sure mean blob precision is the same as the input precision - auto meanDims = inputInfo->getDims(); - // overwrite batches with 1 - switch (meanDims.size()) { - case 4: meanDims[3] = 1; - break; - default: - THROW_CLDNN_EXCEPTION("Missing batch dimensions in input image"); - } - InferenceEngine::TBlob meanBlob(Precision(Precision::FP32), TensorDesc::getLayoutByDims(meanDims), meanDims); - meanBlob.allocate(); - auto meanBlobData = meanBlob.data(); - for (size_t c = 0; c < meanChannels; c++) { - if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10) - THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName); - auto channelMeanBlob = std::dynamic_pointer_cast>(preProcess[c]->meanData); - auto channelSize = channelMeanBlob->size(); - auto channelBlobData = channelMeanBlob->data(); - for (size_t i = 0; i < channelSize; i++) { - meanBlobData[(c * channelSize) + i] = channelBlobData[i]; - } - } - // then create a data primitive for the mean values - auto meanBlobPtr = std::make_shared>(meanBlob); - - // mean values will use external format (sub in the input format before convert to new format) - cldnn::tensor meanBlobTensor(inputLayout.size); - meanBlobTensor.batch[0] = 1; // mean values have no batches - cldnn::layout meanBlobLayout(cldnn::data_types::f32, m_defaultFormat, meanBlobTensor); - CreatePrimitiveFromBlob( - inputName + m_meanValuesTag, - meanBlobPtr, - meanBlobLayout); - m_topology->add(cldnn::reorder(preprocessPrimID, - inputName, - inputLayout, - inputName + m_meanValuesTag)); - m_env.profilingIDs.push_back(preprocessPrimID); - InitProfileInfo(preprocessPrimID, "Reorder"); - } - break; - - default: THROW_CLDNN_EXCEPTION("Invalid mean variant in input " + inputName); - break; - } - m_env.primitiveIDs[inputName] = preprocessPrimID; - m_env.primitiveIDs[preprocessPrimID] = preprocessPrimID; -} - -std::vector CLDNNGraph::GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const { - if (layer == nullptr) { - return {}; - } - std::vector inputPrimitives; - for (auto inputData : layer->insData) { - auto prevData = inputData.lock(); - if (prevData == nullptr) { - THROW_CLDNN_EXCEPTION("Nonexistent input for layer: " << layer->name); - } - auto prevCreator = prevData->creatorLayer.lock(); - std::string prevName; - - if (prevCreator) { - prevName = prevCreator->type + ":"; - if (prevCreator->outData.size() > 1) - prevName += prevData->name; - else - prevName += prevCreator->name; - } else { - prevName = prevData->name; - } - inputPrimitives.push_back(m_env.primitiveIDs.at(prevName)); - } - return inputPrimitives; -} - -void CLDNNGraph::AddOutputPrimitive(std::string outputName, const InferenceEngine::DataPtr outputData, Precision outputPrecision) { - // TODO: add precision check once there's an outputInfo object - if (outputData->layout != InferenceEngine::NCHW && - outputData->layout != InferenceEngine::NHWC && - outputData->layout != InferenceEngine::CHW && - outputData->layout != InferenceEngine::NC) { - THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(outputData->layout) << ") in output: " << outputName); - } - - auto outputCreator = outputData->getCreatorLayer().lock(); - std::string outLayerName = outputCreator->type + ":"; - - if (outputCreator->outData.size() > 1) - outLayerName += outputName; + InferenceEngine::SizeVector sz; + if (res_output != outputDims.end()) + sz = res_output->second; else - outLayerName += outputCreator->name; - - auto outputReorderID = outputName + m_postProcessTag; - Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision; - - // Find correct output ID. Start with name stored in IR. - std::string outputID = outLayerName; - std::string finalID = m_env.primitiveIDs.at(outLayerName); - - while (outputID != finalID) { - auto prim = m_env.primitiveIDs.find(finalID); - - if (prim == m_env.primitiveIDs.end()) { - THROW_IE_EXCEPTION << "Unknown output primitive id " << outputID; - } - outputID = finalID; - finalID = prim->second; - } - - m_topology->add(cldnn::reorder(outputReorderID, outputID, - FormatFromLayout(outputData->getLayout()), - DataTypeFromPrecision(precision))); - m_env.primitiveIDs[outputName] = outputReorderID; - m_env.profilingIDs.push_back(outputReorderID); - InitProfileInfo(outputReorderID, "Reorder"); - m_env.outputDims[outputName] = outputData->dims; - m_env.prevPrimitiveIDs[outputReorderID] = {outputName}; -} - -void CLDNNGraph::AddSingleValuePrimitive(cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value) { - cldnn::layout primLayout(dataType, m_defaultFormat, { 1, 1, 1, 1 }); - auto primMem = cldnn::memory::allocate(*(m_env.engine), primLayout); - switch (dataType) { - case cldnn::data_types::f32: - { - auto tmpPointer = primMem.pointer(); // implicitly maps buffer - unmap in destructor - tmpPointer[0] = value; - } - break; - case cldnn::data_types::f16: - { - auto tmpPointer = primMem.pointer(); // implicitly maps buffer - unmap in destructor - cldnn_status status = CLDNN_SUCCESS; - tmpPointer[0] = cldnn_float_to_half(value, &status); - if (status != CLDNN_SUCCESS) { - THROW_CLDNN_EXCEPTION("Error converting value to fp16."); - } - } - break; - default: - THROW_CLDNN_EXCEPTION("Unhandled data type (precision)"); - } - - m_topology->add(cldnn::data(valPrimID, primMem)); -} - -cldnn::data_types CLDNNGraph::DataTypeFromPrecision(InferenceEngine::Precision p) { - switch (p) { - case Precision::I16: - case Precision::FP32: - return cldnn::data_types::f32; - case Precision::FP16: - return cldnn::data_types::f16; - case Precision::U8: - return cldnn::data_types::u8; - case Precision::I32: - return cldnn::data_types::i32; - default: - THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision"; - break; - } -} - -cldnn::format CLDNNGraph::FormatFromLayout(InferenceEngine::Layout l) { - switch (l) { - case InferenceEngine::Layout::NCHW: - case InferenceEngine::Layout::NC: - case InferenceEngine::Layout::CHW: - case InferenceEngine::Layout::C: - return cldnn::format::bfyx; - case InferenceEngine::Layout::NHWC: - return cldnn::format::byxf; - default: - THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout"; - break; - } -} - -cldnn::upsampling_sample_type CLDNNGraph::UpsamplingTypeFromString(const std::string& str) { - static const caseless_map UpsamplingTypeNameToType = { - { "Bilinear" , cldnn::upsampling_sample_type::bilinear }, - { "Nearest" , cldnn::upsampling_sample_type::nearest }, - }; - auto it = UpsamplingTypeNameToType.find(str); - if (it != UpsamplingTypeNameToType.end()) - return it->second; - else - THROW_CLDNN_EXCEPTION("Unknown Upsampling type: " << str); -} - -cldnn::softmax::dimension_t CLDNNGraph::SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer, bool isPrevFC) { - // WA for default softmax dimension in cldnn for fyx - // todo: remove this once clDNN changes FC output to BF instead of BX - auto dims = softmaxLayer->outData[0]->dims; - unsigned non1Dims = 0; - for (size_t i = 0; i < dims.size(); i++) { - if (dims[i] > 1) { - non1Dims++; - } - } - if (non1Dims == 1 || isPrevFC) { - return cldnn::softmax::normalize_fyx; - } - // end of WA - - switch (softmaxLayer->axis) { - case 1: return cldnn::softmax::normalize_f; - case 2: return cldnn::softmax::normalize_y; - case 3: return cldnn::softmax::normalize_x; - default: THROW_CLDNN_EXCEPTION("Invalid softmax axis " << softmaxLayer->axis); - } - return cldnn::softmax::normalize_fyx; -} - -cldnn::prior_box_code_type CLDNNGraph::PriorBoxCodeFromString(const std::string& str) { - static const std::map CodeNameToType = { - { "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner }, - { "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size }, - { "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size }, - }; - auto it = CodeNameToType.find(str); - if (it != CodeNameToType.end()) { - return it->second; - } else { - THROW_CLDNN_EXCEPTION("Unknown Prior-Box code type: " + str); - return cldnn::prior_box_code_type::corner; - } -} - -void CLDNNGraph::CreateGenericLayerBlobPrimitives(const InferenceEngine::GenericLayer* layer) { - IE_ASSERT(layer); - for (auto& blob : layer->blobs) { - if (blob.second->dims().size() != 1) { - THROW_CLDNN_EXCEPTION("Unhandled blob dim in layer " + layer->name); - } - - cldnn::layout genericLayout(DataTypeFromPrecision(blob.second->precision()), - m_defaultFormat, - cldnn::spatial(TensorValue(blob.second->dims()[0]))); - - CreatePrimitiveFromBlob(layer->type + ":" + layer->name + "_" + blob.first + m_weightsTag, - blob.second, genericLayout); - } -} - -void CLDNNGraph::ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector& blobNames) { - IE_ASSERT(layer); - for (auto& name : blobNames) { - if (layer->blobs.find(name) == layer->blobs.end()) { - THROW_CLDNN_EXCEPTION("Missing blob " + name + " in layer " + layer->name); - } - } -} - -cldnn::tensor CLDNNGraph::CldnnTensorFromIEDims(const InferenceEngine::SizeVector& dims) { - auto numDims = dims.size(); - std::vector outputTensor({ 1, 1, 1, 1 }); - for (size_t i = 0; i < numDims; i++) { - outputTensor[i] = TensorValue(dims[numDims - i - 1]); - } - // swap x,y for cldnn tensor taking bfxy instead of bfyx - auto tmp = outputTensor[2]; - outputTensor[2] = outputTensor[3]; - outputTensor[3] = tmp; - - return outputTensor; -} - -InferRequestInternal::Ptr -CLDNNGraph::CreateInferRequestImpl(InputsDataMap networkInputs, OutputsDataMap networkOutputs) { - if (m_env.network == nullptr) { - THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str; - } - return std::make_shared(m_env, m_config.useProfiling, networkInputs, networkOutputs); -} + sz = outputDims.at(primitiveIDs.at(outName)); -void CLDNNGraph::InitProfileInfo(const std::string& layerName, - const std::string& layerType, - bool isCPU, - InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) { - m_env.perfMap[layerType + ":" + layerName].first = layerName; - auto& perfEntry = m_env.perfMap[layerType + ":" + layerName].second; - perfEntry.layerType = layerType; - perfEntry.status = status; - perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0; - perfEntry.isCPU = isCPU; - perfEntry.status = status; + return sz; } }; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.h b/inference-engine/src/cldnn_engine/cldnn_graph.h index f6391d50d78f9d..48c414d6cb371f 100644 --- a/inference-engine/src/cldnn_engine/cldnn_graph.h +++ b/inference-engine/src/cldnn_engine/cldnn_graph.h @@ -27,259 +27,63 @@ #include #include #include "cldnn_custom_layer.h" +#include "cldnn_config.h" +#include "cldnn_program.h" namespace CLDNNPlugin { -struct PerfCounter { - InferenceEngine::InferenceEngineProfileInfo::LayerStatus status; - bool isCPU; - uint64_t realTime_uSec; - uint64_t cpu_uSec; - uint32_t num; - std::string layerType; - -public: - PerfCounter() : realTime_uSec(0), cpu_uSec(0), num(0), - status(InferenceEngine::InferenceEngineProfileInfo::NOT_RUN), isCPU(false) {} - - long long realTime_avg() const { return (num == 0) ? 0 : realTime_uSec / num; } - long long cpu_avg() const { return (num == 0) ? 0 : cpu_uSec / num; } -}; - -struct InferenceEnv { - std::shared_ptr engine; - std::shared_ptr network; - std::map primitiveIDs; - std::map> prevPrimitiveIDs; - - std::map> perfMap; - std::vector profilingIDs; - - DebugOptions debugOptions; - - std::map outputDims; - std::map inputLayouts; - - std::vector> batchNetworks; - int m_max_batch; - int m_bv_sz; -}; - -class CLDNNGraph : public InferenceEngine::ExecutableNetworkThreadSafeDefault { +class CLDNNGraph { public: typedef std::shared_ptr Ptr; - struct Config { - Config() : useProfiling(false), dumpCustomKernels(false), exclusiveAsyncRequests(false), - memory_pool_on(true), - enableDynamicBatch(false), - queuePriority(cldnn::priority_mode_types::disabled), - queueThrottle(cldnn::throttle_mode_types::disabled) {} - - void LoadFromMap(const std::map& configMap); - bool enableDynamicBatch; - bool useProfiling; - bool dumpCustomKernels; - bool exclusiveAsyncRequests; - bool memory_pool_on; - cldnn::priority_mode_types queuePriority; - cldnn::throttle_mode_types queueThrottle; - CLDNNCustomLayerMap customLayers; - cldnn::tuning_config_options tuningConfig; - std::string graph_dumps_dir; - std::string sources_dumps_dir; - }; - explicit CLDNNGraph(InferenceEngine::ICNNNetwork &network, const Config& config = {}, int max_batch = -1); + explicit CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config = {}, uint16_t stream_id = 0); + explicit CLDNNGraph(std::shared_ptr graph, uint16_t stream_id = 0); + void GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr& graphPtr); - InferenceEngine::InferRequestInternal::Ptr - CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs) override; + bool IsLoaded() const; - static bool IsLayerSupported(const std::string &type) { - return LayerTypeFromStr(type) != NO_TYPE; + static bool IsLayerSupported(const std::string& type) { + return Program::LayerTypeFromStr(type) != Program::NO_TYPE; } -protected: - // graph members - std::shared_ptr m_topology; - InferenceEnv m_env; - Config m_config; - - InferenceEngine::InputsDataMap* p_currentInputs; - InferenceEngine::OutputsDataMap* p_currentOutputs; - int m_curBatch; - static const cldnn::primitive_id m_preProcessTag; - static const cldnn::primitive_id m_weightsTag; - static const cldnn::primitive_id m_biasesTag; - static const cldnn::primitive_id m_meanValuesTag; - static const cldnn::primitive_id m_postProcessTag; - static const cldnn::primitive_id m_scalesTag; - static const cldnn::primitive_id m_workaroundTag; - static const cldnn::primitive_id m_preCustomLayerTag; - static const cldnn::primitive_id m_postCustomLayerTag; - - // internal types - enum LayerType { - Convolution, - ReLU, - ReLU6, - Sigmoid, - TanH, - ELU, - Activation, - Exp, - Not, - LRN, - Pooling, - FullyConnected, - SoftMax, - Power, - Split, - Concatenate, - Eltwise, - SimplerNMS, - ROIPooling, - Crop, - Deconvolution, - PriorBox, - DetectionOutput, - Normalize, - Reshape, - Permute, - Flatten, - BatchNormalization, - PReLU, - ScaleShift, - Proposal, - PSROIPooling, - Clamp, - Copy, - Upsampling, - Resample, - RegionYolo, - ReorgYolo, - ConstantBlob, - ArgMax, - MVN, - Unpooling, - Tile, - Pad, - LSTMCell, - RNN, - Gather, - DepthToSpace, - ShuffleChannels, - StridedSlice, - ReverseSequence, - NO_TYPE - }; + void GetPerformanceCounts(std::map& perfMap) const; + void UpdatePerfStatistics(); - enum WeightRearrangeType { - BroadcastFeatures, - FlipDeconvDims, - NO_REARRANGE - }; + int GetMaxDynamicBatchSize() const { return m_config.max_dynamic_batch; } + const std::map& GetInputLayouts() const { return m_program->getInputLayouts(); } + std::shared_ptr GetEngine() const { return m_engine; } + size_t GetNetworksCount() const { return m_networks.size(); } + std::shared_ptr GetNetwork(size_t idx = 0) const; + InferenceEngine::SizeVector GetOutputSize(std::string outName) const; + std::string MapOutputName(std::string outName) const; + std::string getName() const { return m_networkName; } + const Config& getConfig() const { return m_config; } - cldnn::format m_defaultFormat; - void InitFormat(InferenceEngine::ICNNNetwork &network); - - static cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p); - static cldnn::format FormatFromLayout(InferenceEngine::Layout l); - static cldnn::upsampling_sample_type UpsamplingTypeFromString(const std::string& str); +protected: + std::string m_networkName; - void Load(InferenceEngine::ICNNNetwork &network); - static LayerType LayerTypeFromStr(const std::string& str); - static cldnn::pooling_mode PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding = false); - static cldnn::eltwise_mode EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op); - static cldnn::concatenation::concatenation_axis ConcatAxisFromIEAxis(unsigned axis); - static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str); - static cldnn::softmax::dimension_t SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer, bool isPrevFC = false); - void CreatePrimitiveFromBlob(cldnn::primitive_id primID, - const InferenceEngine::Blob::Ptr pBlob, - const cldnn::layout& blobLayout, - size_t blobByteOffset = 0, - WeightRearrangeType rearrange = NO_REARRANGE); - void CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPtr& layer, - std::vector& weightsPrimID, - std::vector& biasesPrimID); - void CreateScaleWeightsAndBiasesFromBN(const InferenceEngine::BatchNormalizationLayer* bnLayer, - cldnn::primitive_id weightsPrimID, - cldnn::primitive_id biasesPrimID); - void AddPreProcessPrimitive(InferenceEngine::InputInfo::Ptr inputInfo); - void AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo, InferenceEngine::Precision inputPrecision); - void AddOutputPrimitive(std::string outputName, const InferenceEngine::DataPtr outputData, - InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::UNSPECIFIED); - void CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr& layer); - bool IsValidSplitConvMerge(const InferenceEngine::SplitLayer* splitLayer) const; - bool CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const; - static std::vector GetNextLayers(const InferenceEngine::DataPtr data); - static std::vector GetNextLayers(const InferenceEngine::CNNLayerPtr layer); - static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::DataPtr data); - static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer); - std::vector GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const; - void AddSingleValuePrimitive(cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value); + std::shared_ptr m_engine; + std::vector> m_networks; + std::map primitiveIDs; + std::map> primitivesToIRLayersMap; + std::map> prevPrimitiveIDs; - void CreateGenericLayerBlobPrimitives(const InferenceEngine::GenericLayer* layer); - static void ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector& blobNames); - static cldnn::tensor CldnnTensorFromIEDims(const InferenceEngine::SizeVector& dims); - static bool HasParam(const std::map& layerParams, std::string paramName) { - auto p = layerParams.find(paramName); - return p != layerParams.end(); - } + std::map> perfMap; + std::map implementationsMap; + std::vector profilingIDs; - void InitProfileInfo(const std::string& layerName, - const std::string& layerType, - bool isCPU = false, - InferenceEngine::InferenceEngineProfileInfo::LayerStatus status - = InferenceEngine::InferenceEngineProfileInfo::EXECUTED); - void changeInputBatch(size_t batch); - void CompileNetwork(); + std::map outputDims; - // Layer Primitive Creators - void CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr & layer); - void CreateFlattenPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreatePermutePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateDetectionOutputPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateConcatenatePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateSplitPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateSoftMaxPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer, const LayerType type); - void CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateProposalPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateTilePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreatePadPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer); - void AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer); - void CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr &layer, CLDNNCustomLayerPtr customLayer); - void CreateGatherPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateDepthToSpacePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateShuffleChannelsPrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateStridedSlicePrimitive(InferenceEngine::CNNLayerPtr &layer); - void CreateReverseSequencePrimitive(InferenceEngine::CNNLayerPtr &layer); + std::shared_ptr m_program; + Config m_config; + uint16_t m_stream_id; + + std::shared_ptr BuildNetwork(std::shared_ptr program); + void Build(); + void UpdateLayersMaps(); + void UpdateImplementationsMap(); + InferenceEngine::ICNNNetwork::Ptr GetExecGraphInfoByPrimitivesInfo(std::vector& pi, + bool filter_const_primitives = true); }; -}; // namespace CLDNNPlugin +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp index 6b0ea700c79763..f2b7e10c8ef0b2 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp @@ -9,60 +9,69 @@ #include // todo: find a way to remove this #include #include "cldnn_infer_request.h" +#include "cldnn_streams_task_executor.h" using namespace InferenceEngine; namespace CLDNNPlugin { +std::atomic CLDNNInferRequest::runningCounter(0u); + const char CLDNNInferRequest::fp32_suffix[] = "_fp32"; Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) { - const Layout l = desc.getLayout(); const Precision p = desc.getPrecision(); - const SizeVector sz = SizeVector(desc.getDims().rbegin(), desc.getDims().rend()); switch (p) { case Precision::FP32: if (mem_ptr != nullptr) - return make_shared_blob(p, l, sz, reinterpret_cast(mem_ptr)); + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); else - return make_shared_blob(p, l, sz); + return make_shared_blob(desc); case Precision::FP16: if (mem_ptr != nullptr) - return make_shared_blob(p, l, sz, reinterpret_cast(mem_ptr)); + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); else - return make_shared_blob(p, l, sz); + return make_shared_blob(desc); case Precision::I16: if (mem_ptr != nullptr) - return make_shared_blob(p, l, sz, reinterpret_cast(mem_ptr)); + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); + case Precision::I32: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); else - return make_shared_blob(p, l, sz); + return make_shared_blob(desc); case Precision::U8: if (mem_ptr != nullptr) - return make_shared_blob(p, l, sz, reinterpret_cast(mem_ptr)); + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); else - return make_shared_blob(Precision::U8, l, sz); + return make_shared_blob(desc); default: THROW_IE_EXCEPTION << "The plugin does not support input " << p.name() << " precision"; } } Blob::Ptr CLDNNInferRequest::createOutputBlob(const TensorDesc& desc, uint8_t* mem_ptr) { - const Layout l = desc.getLayout(); const Precision p = desc.getPrecision(); - const SizeVector sz = SizeVector(desc.getDims().rbegin(), desc.getDims().rend()); switch (p) { case Precision::FP32: if (mem_ptr != nullptr) - return make_shared_blob(p, l, sz, reinterpret_cast(mem_ptr)); + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); else - return make_shared_blob(p, l, sz); + return make_shared_blob(desc); case Precision::FP16: if (mem_ptr != nullptr) - return make_shared_blob(p, l, sz, reinterpret_cast(mem_ptr)); + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); else - return make_shared_blob(p, l, sz); + return make_shared_blob(desc); + case Precision::I32: + if (mem_ptr != nullptr) + return make_shared_blob(desc, reinterpret_cast(mem_ptr)); + else + return make_shared_blob(desc); default: THROW_IE_EXCEPTION << "The plugin does not support output " << p.name() << " precision"; } @@ -83,7 +92,7 @@ void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory, auto v_padding_l = (h_padding + size.spatial[0]) * u_padd.spatial[1]; auto v_padding_u = (h_padding + size.spatial[0]) * l_padd.spatial[1]; - switch (bptr->precision()) { + switch (bptr->getTensorDesc().getPrecision()) { case Precision::FP32: { TBlob::Ptr out_f = std::dynamic_pointer_cast>(bptr); if (out_f == nullptr) { @@ -115,12 +124,42 @@ void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory, } break; case Precision::FP16: { - TBlob::Ptr out_f = std::dynamic_pointer_cast>(bptr); + auto* out_f = bptr->buffer().as(); if (out_f == nullptr) { THROW_IE_EXCEPTION << "Invalid output blob"; } auto resPtr = outputMemory.pointer(); - uint16_t *resVec = out_f->data() + offset; + uint16_t* resVec = out_f + offset; + + if (h_padding || v_padding_l || v_padding_u) { + size_t i = 0; + for (size_t b = 0; b < size.batch[0]; b++) { + for (size_t f = 0; f < size.feature[0]; f++) { + i += v_padding_l; + for (size_t y = 0; y < size.spatial[1]; y++) { + i += l_padd.spatial[0]; + for (size_t x = 0; x < size.spatial[0]; x++, i++) { + *resVec++ = resPtr[i]; + } + i += u_padd.spatial[0]; + } + i += v_padding_u; + } + } + } else { + for (size_t i = 0; i < n; i++) { + resVec[i] = resPtr[i]; + } + } + } + break; + case Precision::I32: { + TBlob::Ptr out_f = std::dynamic_pointer_cast>(bptr); + if (out_f == nullptr) { + THROW_IE_EXCEPTION << "Invalid output blob"; + } + auto resPtr = outputMemory.pointer(); + int32_t* resVec = out_f->data() + offset; if (h_padding || v_padding_l || v_padding_u) { size_t i = 0; @@ -145,7 +184,7 @@ void CLDNNInferRequest::copyOutputData(const cldnn::memory& outputMemory, } break; default: - THROW_IE_EXCEPTION << "The plugin does not support output " << bptr->precision() << " precision"; + THROW_IE_EXCEPTION << "The plugin does not support output " << bptr->getTensorDesc().getPrecision() << " precision"; } } @@ -156,13 +195,18 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr network, size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size; size_t offset = (bi == nullptr) ? 0 : bi->buf_offset; - cldnn::primitive_id internalName = "Input:" + inputName; - switch (inputBlob.precision()) { + cldnn::primitive_id internalName = "input:" + inputName; + switch (inputBlob.getTensorDesc().getPrecision()) { case Precision::FP32: { float* blob_ptr = const_cast(inputBlob.cbuffer().as()) + offset; network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); break; } + case Precision::I32: { + int32_t* blob_ptr = const_cast(inputBlob.cbuffer().as()) + offset; + network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); + break; + } case Precision::FP16: { uint16_t* blob_ptr = const_cast(inputBlob.cbuffer().as()) + offset; network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n)); @@ -174,20 +218,20 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr network, break; } default: - THROW_IE_EXCEPTION << "The plugin does not support input " << inputBlob.precision() << " precision"; + THROW_IE_EXCEPTION << "The plugin does not support input " << inputBlob.getTensorDesc().getPrecision() << " precision"; } } void CLDNNInferRequest::AllocateInputs() { // allocate inputs - for (auto &input : m_env.inputLayouts) { + for (auto &input : m_graph->GetInputLayouts()) { std::string name = input.first; cldnn::layout layout = input.second; InputInfo::Ptr ni = _networkInputs.at(input.first); const TensorDesc& desc = ni->getTensorDesc(); - cldnn::memory inputMem = cldnn::memory::allocate(*(m_env.engine), layout); + cldnn::memory inputMem = cldnn::memory::allocate(*(m_graph->GetEngine()), layout); cldnn::pointer mem_ptr = inputMem.pointer(); inputsMemory.insert({ name, inputMem }); @@ -196,7 +240,7 @@ void CLDNNInferRequest::AllocateInputs() { if (desc.getPrecision() == Precision::I16) { cldnn::layout layout_fp32 = layout; layout_fp32.data_type = cldnn::data_types::f32; - cldnn::memory inputMem_fp32 = cldnn::memory::allocate(*(m_env.engine), layout_fp32); + cldnn::memory inputMem_fp32 = cldnn::memory::allocate(*(m_graph->GetEngine()), layout_fp32); inputsMemory.insert({ input.first + fp32_suffix, inputMem_fp32 }); } } @@ -204,22 +248,21 @@ void CLDNNInferRequest::AllocateInputs() { void CLDNNInferRequest::AllocateInputsDyn() { // allocate inputs - for (auto &input : m_env.inputLayouts) { + for (auto &input : m_graph->GetInputLayouts()) { InputInfo::Ptr ni = _networkInputs.at(input.first); TensorDesc desc = ni->getTensorDesc(); SizeVector& dims = desc.getDims(); if (!dims.empty()) { - *dims.begin() = static_cast(m_env.m_max_batch); + *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); } else { THROW_IE_EXCEPTION << "Empty dimensions for input blob " << input.first; } Blob::Ptr inputBlob = createInputBlob(desc); if (desc.getPrecision() == Precision::I16) { - auto fp32inputBlob = InferenceEngine::make_shared_blob(Precision::FP32, - desc.getLayout(), - desc.getDims()); + desc.setPrecision(Precision::FP32); + auto fp32inputBlob = InferenceEngine::make_shared_blob(desc); fp32inputBlob->allocate(); _inputs[input.first + fp32_suffix] = fp32inputBlob; } @@ -229,27 +272,11 @@ void CLDNNInferRequest::AllocateInputsDyn() { } void CLDNNInferRequest::AllocateOutputs() { - auto networkOutputsIDs = m_env.network->get_output_ids(); - auto allPrimitiveIds = m_env.network->get_all_primitives(); - // allocate outputs + bool can_reuse_internal_mem = !m_useStreams; for (auto& no : _networkOutputs) { - // Find correct output ID. Start with name stored in IR. - std::string outputID = m_env.primitiveIDs.at(no.first); - while (std::find(networkOutputsIDs.begin(), networkOutputsIDs.end(), outputID) == networkOutputsIDs.end()) { - // If current ID isn't found in cldnn network outputs, get previous primitive id and try again. - auto prim = allPrimitiveIds.find(outputID); - if (prim == allPrimitiveIds.end()) { - THROW_IE_EXCEPTION << "Unknown primitive id " << outputID; - } - - if (m_env.prevPrimitiveIDs.at(outputID).size() != 1 || prim->second != "_optimized_") { - THROW_IE_EXCEPTION << "Unable to find parent for output primitive " << outputID; - } - outputID = m_env.prevPrimitiveIDs.at(outputID)[0]; - } - - cldnn::memory output_mem = m_env.network->get_output_memory(outputID); + std::string outputID = m_graph->MapOutputName(no.first); + cldnn::memory output_mem = m_graph->GetNetwork()->get_output_memory(outputID); cldnn::pointer output_mem_ptr = output_mem.pointer(); if (output_mem_ptr.data() == nullptr) { THROW_IE_EXCEPTION << "Empty output memory for primitive " << outputID; @@ -258,7 +285,13 @@ void CLDNNInferRequest::AllocateOutputs() { DataPtr oi = no.second; const TensorDesc& desc = oi->getTensorDesc(); - _outputs[no.first] = createOutputBlob(desc, output_mem_ptr.data()); + if (can_reuse_internal_mem) { + _outputs[no.first] = createOutputBlob(desc, output_mem_ptr.data()); + } else { + Blob::Ptr outputBlob = createOutputBlob(desc); + outputBlob->allocate(); + _outputs[no.first] = outputBlob; + } outputsMap[no.first] = outputID; } } @@ -271,7 +304,7 @@ void CLDNNInferRequest::AllocateOutputsDyn() { SizeVector& dims = desc.getDims(); if (!dims.empty()) { - *dims.begin() = static_cast(m_env.m_max_batch); + *dims.begin() = static_cast(m_graph->GetMaxDynamicBatchSize()); } else { THROW_IE_EXCEPTION << "Empty dimensions for output blob " << no.first; } @@ -282,11 +315,28 @@ void CLDNNInferRequest::AllocateOutputsDyn() { } } +void CLDNNInferRequest::SetGraph(std::shared_ptr graph) { + m_graph = graph; + + if (m_graph == nullptr) { + THROW_IE_EXCEPTION << NETWORK_NOT_LOADED_str; + } + + if (m_graph->GetMaxDynamicBatchSize() > 1) { + SetBatch(m_graph->GetMaxDynamicBatchSize()); + AllocateInputsDyn(); + AllocateOutputsDyn(); + } else { + AllocateInputs(); + AllocateOutputs(); + } +} + void CLDNNInferRequest::SetBatch(int new_batch) { - if (m_env.m_max_batch < 0) + if (m_graph->GetMaxDynamicBatchSize() < 0) THROW_IE_EXCEPTION << "Dynamic batch is not enabled."; - if (new_batch < 1 || new_batch > m_env.m_max_batch) { + if (new_batch < 1 || new_batch > m_graph->GetMaxDynamicBatchSize()) { THROW_IE_EXCEPTION << "Invalid dynamic batch size " << new_batch << " for this request."; } @@ -298,9 +348,9 @@ void CLDNNInferRequest::SetBatch(int new_batch) { batchOutputs.clear(); // tune expected inputs - for (auto &input : m_env.inputLayouts) { + for (auto &input : m_graph->GetInputLayouts()) { cldnn::tensor dims = input.second.size; - const SizeVector sz = { size_t(dims.spatial[0]), size_t(dims.spatial[1]), size_t(dims.feature[0]), 1 }; + const SizeVector sz = { 1, size_t(dims.feature[0]), size_t(dims.spatial[1]), size_t(dims.spatial[0]) }; size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies()); std::vector in_buf; @@ -309,7 +359,7 @@ void CLDNNInferRequest::SetBatch(int new_batch) { int b = 0; // calculate metadata for input buffers - for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) { + for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) { unsigned int mask = 1 << nb; buf_info ib = { offset, bsz }; @@ -325,24 +375,16 @@ void CLDNNInferRequest::SetBatch(int new_batch) { // tune expected outputs for (auto& no : _networkOutputs) { - auto res_output = m_env.outputDims.find(no.first); - - InferenceEngine::SizeVector sz; - if (res_output != m_env.outputDims.end()) - sz = res_output->second; - else - sz = m_env.outputDims.at(m_env.primitiveIDs.at(no.first)); - - sz.back() = 1; + auto sz = m_graph->GetOutputSize(no.first); + sz.front() = 1; size_t single_batch = std::accumulate(std::begin(sz), std::end(sz), (size_t)1, std::multiplies()); std::vector out_buf; size_t offset = 0; size_t bsz = single_batch; - int b = 0; // calculate metadata for output buffers - for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) { - unsigned int mask = 1 << nb; + for (uint32_t nb = 0; nb < m_graph->GetNetworksCount(); nb++) { + uint32_t mask = 1 << nb; buf_info ob = { offset, bsz }; out_buf.push_back(ob); @@ -359,64 +401,15 @@ void CLDNNInferRequest::SetBatch(int new_batch) { m_curBatch = new_batch; } -CLDNNInferRequest::CLDNNInferRequest(const InferenceEnv& env, bool useProfiling, - InputsDataMap networkInputs, OutputsDataMap networkOutputs) - : InferRequestInternal(networkInputs, networkOutputs), - m_env(env), - m_useProfiling(useProfiling) { - if (m_env.m_max_batch > 1) { - SetBatch(m_env.m_max_batch); - AllocateInputsDyn(); - AllocateOutputsDyn(); - } else { - AllocateInputs(); - AllocateOutputs(); - } - - // Fill implementations map - if (m_useProfiling) { - auto extractImplementationFromInfo = [](const std::string& info) -> std::string { - std::string def_implementation = "undef"; - std::string impl_section = "implementation :"; - std::string::size_type pos = info.find(impl_section); - if (pos == std::string::npos) { - return def_implementation; - } - - std::string::size_type end_pos = info.find(',', pos); - if (end_pos == std::string::npos) { - return def_implementation; - } - - std::string::size_type length = end_pos - pos - impl_section.size(); - - auto trim = [](const std::string& str) { - size_t first = str.find_first_not_of(' '); - if (std::string::npos == first) { - return str; - } - size_t last = str.find_last_not_of(' '); - return str.substr(first, (last - first + 1)); - }; - std::string tmp = trim(info.substr(pos + impl_section.size(), length)); - - return tmp.length() > 1 ? tmp : def_implementation; - }; - - // Parse primitive info and extract implementation name. - for (auto& id : m_env.profilingIDs) { - std::string prim_info = ""; - try { - prim_info = m_env.network->get_primitive_info(id); - } catch (std::exception& e) { } - - implementationsMap.insert({id, extractImplementationFromInfo(prim_info)}); - } - } +CLDNNInferRequest::CLDNNInferRequest(InputsDataMap networkInputs, OutputsDataMap networkOutputs) + : InferRequestInternal(networkInputs, networkOutputs) + , m_useProfiling(false) + , m_useStreams(false) { } void CLDNNInferRequest::execAndParse() { - auto networkOutputs = m_env.network->execute(); + runningCounter++; + auto networkOutputs = m_graph->GetNetwork()->execute(); // Collect outputs as requested by the model for (auto& no : _networkOutputs) { @@ -433,79 +426,34 @@ void CLDNNInferRequest::execAndParse() { copyOutputData(outputMemory, bptr); } } + runningCounter--; // finally collect profiling info if (m_useProfiling) { - std::map executedPrimitives = m_env.network->get_executed_primitives(); - auto allPrimitives = m_env.network->get_all_primitives(); - - // Get profiling info for all layers - for (auto &profiledID : m_env.profilingIDs) { - auto& perfCount = m_env.perfMap[profiledID].second; - // Change status if layer wasn't executed by cldnn engine - if (perfCount.num == 0 && - executedPrimitives.find(profiledID) == executedPrimitives.end()) { - if (allPrimitives.find(profiledID) != allPrimitives.end() && - allPrimitives.at(profiledID) == "_optimized_") { - // Layer was marked as optimized by cldnn - perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT; - } else { - // Layer wasn't run for some reason - perfCount.status = InferenceEngineProfileInfo::NOT_RUN; - } - continue; - } - - auto event = executedPrimitives.at(profiledID); - executedPrimitives.erase(profiledID); - - cldnn::instrumentation::profiling_info cldnnInfo{profiledID, event.get_profiling_info()}; - - // Collect timings - for (auto &interval : cldnnInfo.intervals) { - using duration_t = std::chrono::duration; - auto count = std::chrono::duration_cast(interval.value->value()).count(); - - if (interval.name == "submission") { - perfCount.cpu_uSec += count; - } else if (interval.name == "executing") { - perfCount.realTime_uSec += count; - } else if (interval.name == "duration") { // "duration" is used for CPU layers - perfCount.cpu_uSec += count; - - if (perfCount.num == 0) - perfCount.isCPU = true; - } - } - perfCount.num++; - } + m_graph->UpdatePerfStatistics(); } } void CLDNNInferRequest::execAndParseDyn() { - std::vector> networkOutputs(m_env.m_bv_sz); + runningCounter++; + std::vector> networkOutputs(m_graph->GetNetworksCount()); // set up exection and put all graphs into driver queue - for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) { + for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) { unsigned int mask = 1 << nb; if (m_curBatch & mask) { - networkOutputs[nb] = m_env.batchNetworks[nb]->execute(); + networkOutputs[nb] = m_graph->GetNetwork(nb)->execute(); } } // now try to get execution results - for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) { + for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) { unsigned int mask = 1 << nb; if (m_curBatch & mask) { for (auto& no : _networkOutputs) { - std::string outputID = no.first; - while ((m_env.primitiveIDs.find(outputID) != m_env.primitiveIDs.end()) && - (m_env.primitiveIDs.at(outputID) != outputID)) { - outputID = m_env.primitiveIDs.at(outputID); - } - + std::string outputID = m_graph->MapOutputName(no.first); auto outputMemory = networkOutputs[nb].at(outputID).get_memory(); Blob::Ptr bptr = _outputs[no.first]; @@ -513,16 +461,21 @@ void CLDNNInferRequest::execAndParseDyn() { } } } + runningCounter--; } void CLDNNInferRequest::InferImpl() { IE_PROFILING_AUTO_SCOPE(CLDNN_INFER) + if (CLDNNPlugin::MultiWorkerTaskExecutor::ptrContext.ptrGraph != nullptr) { + m_graph = CLDNNPlugin::MultiWorkerTaskExecutor::ptrContext.ptrGraph; + } + // execute input pre-processing. execDataPreprocessing(_inputs, true); // "true" stands for serial preprocessing in case of OpenMP for (auto &item : _inputs) { - if (m_env.m_max_batch > 1) { + if (m_graph->GetMaxDynamicBatchSize() > 1) { PrepareInputDyn(item.first, *item.second); } else { PrepareInput(item.first, *item.second); @@ -530,7 +483,7 @@ void CLDNNInferRequest::InferImpl() { } // The actual inference - if (m_env.m_max_batch > 1) { + if (m_graph->GetMaxDynamicBatchSize() > 1) { execAndParseDyn(); } else { execAndParse(); @@ -542,41 +495,16 @@ void CLDNNInferRequest::GetPerformanceCounts( if (!m_useProfiling) { THROW_IE_EXCEPTION << "Performance counters were not enabled"; } else { - unsigned i = 0; - for (auto& profiledID : m_env.profilingIDs) { - const auto& layerName = m_env.perfMap.at(profiledID).first; - if (layerName.length() == 0) // no layer directly associated - continue; - - const auto& perfCounter = m_env.perfMap.at(profiledID).second; - auto& extPerfEntry = perfMap[layerName]; - - // copy layer implementation - if (perfCounter.isCPU) { - static const std::string cpuExecType("CPU"); - memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type)); - cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length()); // Override execType as CPU - } else { - std::string impl = implementationsMap.at(profiledID); - impl.copy(extPerfEntry.exec_type, impl.length()); - } - - extPerfEntry.execution_index = i++; - extPerfEntry.status = perfCounter.status; - extPerfEntry.cpu_uSec = perfCounter.cpu_avg(); - extPerfEntry.realTime_uSec = perfCounter.realTime_avg(); - - perfCounter.layerType.copy(extPerfEntry.layer_type, perfCounter.layerType.length()); - } + m_graph->GetPerformanceCounts(perfMap); } } void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const Blob &inputBlob) { // Get input layout - if (m_env.inputLayouts.find(inputName) == m_env.inputLayouts.end()) { + if (m_graph->GetInputLayouts().find(inputName) == m_graph->GetInputLayouts().end()) { THROW_IE_EXCEPTION << "Input name mismatch."; } - auto inputLayout = m_env.inputLayouts.at(inputName); + auto inputLayout = m_graph->GetInputLayouts().at(inputName); auto is_same_buffer = [](const Blob& blob, const cldnn::memory& memory) -> bool { const std::string str_not_allocated("Input data was not allocated."); cldnn::pointer ptr = memory.pointer(); @@ -588,41 +516,42 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size()); }; - cldnn::primitive_id internalName = "Input:" + inputName; + cldnn::primitive_id internalName = "input:" + inputName; const cldnn::memory& memory = inputsMemory.at(inputName); - if (inputBlob.precision() == Precision::I16) { + if (inputBlob.getTensorDesc().getPrecision() == Precision::I16) { // clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision const cldnn::memory& fp32_mem = inputsMemory.at(inputName+fp32_suffix); cldnn::pointer ptr = fp32_mem.pointer(); InferenceEngine::copyToFloat(ptr.data(), &inputBlob); - m_env.network->set_input_data(internalName, fp32_mem); + m_graph->GetNetwork()->set_input_data(internalName, fp32_mem); } else if (is_same_buffer(inputBlob, memory)) { // If input memory was allocated by cldnn engine and wasn't overwritten by user set_input_data method won't copy input data. - switch (inputBlob.precision()) { + switch (inputBlob.getTensorDesc().getPrecision()) { case Precision::FP32: case Precision::FP16: - case Precision::U8: { - m_env.network->set_input_data(internalName, memory); + case Precision::U8: + case Precision::I32: { + m_graph->GetNetwork()->set_input_data(internalName, memory); break; } default: - THROW_IE_EXCEPTION << "Unsupported input precision " << inputBlob.precision(); + THROW_IE_EXCEPTION << "Unsupported input precision " << inputBlob.getTensorDesc().getPrecision(); } } else { // Otherwise, we have to attach to user memory and then copy the data. - copyInputData(m_env.network, inputName, inputLayout, inputBlob); + copyInputData(m_graph->GetNetwork(), inputName, inputLayout, inputBlob); } } void CLDNNInferRequest::PrepareInputDyn(const cldnn::primitive_id &inputName, const Blob &inputBlob) { // now try to get execution results - for (unsigned nb = 0; nb < m_env.m_bv_sz; nb++) { + for (unsigned nb = 0; nb < m_graph->GetNetworksCount(); nb++) { unsigned int mask = 1 << nb; if (m_curBatch & mask) { - auto inputLayout = m_env.inputLayouts.at(inputName); + auto inputLayout = m_graph->GetInputLayouts().at(inputName); inputLayout.size.batch[0] = mask; - copyInputData(m_env.batchNetworks[nb], inputName, inputLayout, inputBlob, &batchInputs[inputName][nb]); + copyInputData(m_graph->GetNetwork(nb), inputName, inputLayout, inputBlob, &batchInputs[inputName][nb]); } } } diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_infer_request.h index 4040c08b6c6996..e587e915fbe7da 100644 --- a/inference-engine/src/cldnn_engine/cldnn_infer_request.h +++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -21,27 +22,32 @@ struct buf_info { }; class CLDNNInferRequest : public InferenceEngine::InferRequestInternal { + static std::atomic runningCounter; + public: void InferImpl() override; - void - GetPerformanceCounts(std::map &perfMap) const override; + void GetPerformanceCounts(std::map &perfMap) const override; - CLDNNInferRequest(const InferenceEnv& env, bool useProfiling, - InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs); + CLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs); CLDNNInferRequest(const CLDNNInferRequest &) = delete; virtual ~CLDNNInferRequest() = default; void SetBatch(int batch = -1) override; + void SetGraph(std::shared_ptr graph); + void EnableProfiling() { m_useProfiling = true; } + void EnableStreams() { m_useStreams = true; } + static unsigned int GetRunningCounter() { return runningCounter.load(); } protected: std::map inputsMemory; std::map outputsMap; - std::map implementationsMap; + bool m_useProfiling; - InferenceEnv m_env; + bool m_useStreams; + std::shared_ptr m_graph; // dynamic batch stuff std::map> batchInputs; @@ -51,8 +57,8 @@ class CLDNNInferRequest : public InferenceEngine::InferRequestInternal { InferenceEngine::Blob::Ptr createOutputBlob(const InferenceEngine::TensorDesc& desc, uint8_t* mem_ptr = nullptr); void copyOutputData(const cldnn::memory& outputMemory, InferenceEngine::Blob::Ptr bptr, buf_info* bi = nullptr); void copyInputData(std::shared_ptr network, const cldnn::primitive_id &inputName, - const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob, - buf_info* bi = nullptr); + const cldnn::layout& inputLayout, const InferenceEngine::Blob &inputBlob, + buf_info* bi = nullptr); void AllocateInputs(); void AllocateOutputs(); diff --git a/inference-engine/src/cldnn_engine/cldnn_lstm.cpp b/inference-engine/src/cldnn_engine/cldnn_lstm.cpp new file mode 100644 index 00000000000000..6ca467fdb89a78 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_lstm.cpp @@ -0,0 +1,571 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cldnn_program.h" + +using namespace InferenceEngine; +using namespace InferenceEngine::details; + +namespace CLDNNPlugin { + +std::string get_string_id(size_t i) { + std::stringstream ss; + ss << std::setw(5) << std::setfill('0') << i; + return ss.str(); +} + +void Program::CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + int lstm_batch_size, lstm_input_size, lstm_hidden_size; + bool hasBias = false; + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + std::string layerName = layer_type_name_ID(layer); + cldnn::primitive_id weightID = layerName + m_weightsTag; + cldnn::primitive_id biasID = layerName + m_biasesTag; + + /* check incoming CNN layer and setup required variables */ + { + auto in_data0 = layer->insData[0].lock(); + if (!in_data0) + THROW_IE_EXCEPTION << "Missing first input for LSTMCell layer " << layer->name; + + const auto in_dims0 = in_data0->getTensorDesc().getDims(); + const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims(); + + lstm_input_size = in_dims0.back(); + lstm_batch_size = in_dims0.at(in_dims0.size()-2); + lstm_hidden_size = out_dims0.back(); + + auto in_data1 = layer->insData[1].lock(); + if (!in_data1) + THROW_IE_EXCEPTION << "Missing second input for LSTMCell layer " << layer->name; + + auto in_data2 = layer->insData[2].lock(); + if (!in_data2) + THROW_IE_EXCEPTION << "Missing third input for LSTMCell layer " << layer->name; + + if (in_dims0.size() != 2 || + in_data1->getTensorDesc().getDims().size() != 2 || + in_data2->getTensorDesc().getDims().size() != 2) + THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell Layer " << layer->name; + } + + /* Prepare weight/bias memory primitives */ + { + auto wLayer = as(layer); + auto pWeightsBlob = wLayer->_weights; + cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(4 * lstm_hidden_size), cldnn::feature(1), cldnn::spatial(lstm_input_size + lstm_hidden_size, 1)); + cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor); + weightID = CreatePrimitiveFromBlob(topology, weightID, pWeightsBlob, WLayout); + + /* create bias memory primitive */ + auto pBiasBlob = wLayer->_biases; + if (pBiasBlob != nullptr) { + cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1)); + cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, bTensor); + + biasID = CreatePrimitiveFromBlob(topology, biasID, pBiasBlob, BLayout); + hasBias = true; + } + } + + cldnn::primitive_id inReshapeID = layerName + "_inReshape"; + cldnn::primitive_id permuteID = layerName + "_inputReorder"; + cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape"; + cldnn::primitive_id inHiddenReorderID = layerName + "_inHiddenReorder"; + cldnn::primitive_id gemmReshapeID = layerName + "_gemmReshape"; + cldnn::primitive_id gemmReorderID = layerName + "_gemmReorder"; + cldnn::primitive_id concatID = layerName + "_inputConcat"; + + cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 }; + cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 }; + cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape); + cldnn::layout hiddenLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, hiddenStateShape); + topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape)); + topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout)); + + primitivesToIRLayersMap[inReshapeID] = { layer->name }; + primitivesToIRLayersMap[permuteID] = { layer->name }; + + std::string hiddenInResh = inHiddenReshapeID + "_1"; + std::string hiddenInStr = inHiddenReorderID + "_1"; + std::string cellInResh = inHiddenReshapeID + "_2"; + std::string cellInStr = inHiddenReorderID + "_2"; + topology.add(cldnn::reshape(hiddenInResh, inputPrimitives[1], hiddenStateShape)); + topology.add(cldnn::reorder(hiddenInStr, hiddenInResh, hiddenLayout)); + topology.add(cldnn::reshape(cellInResh, inputPrimitives[2], hiddenStateShape)); + topology.add(cldnn::reorder(cellInStr, cellInResh, hiddenLayout)); + topology.add(cldnn::concatenation(concatID, { permuteID, hiddenInStr }, cldnn::concatenation::concatenation_axis::along_x)); + + primitivesToIRLayersMap[hiddenInStr] = { layer->name }; + primitivesToIRLayersMap[cellInStr] = { layer->name }; + + cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 }; + cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, gemmSz); + cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 }; + cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0}; + + std::string lstm_fc_id = layerName + "_fully_connected"; + std::string lstm_elt_id = layerName + "_lstm_elt"; + std::string crop_id = layerName + "_crop"; + + topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : "")); + topology.add(cldnn::reshape(gemmReshapeID, lstm_fc_id, gemmSz)); + topology.add(cldnn::reorder(gemmReorderID, gemmReshapeID, gemmLayout)); + topology.add(cldnn::lstm_elt(lstm_elt_id, gemmReorderID, cellInStr, + 0, 0, {}, {}, cldnn_lstm_offset_order_fizo)); + + primitivesToIRLayersMap[lstm_fc_id] = { layer->name }; + primitivesToIRLayersMap[lstm_elt_id] = { layer->name }; + + cldnn::primitive_id outputHiddenID = layerName; + topology.add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0})); + cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[1]->getName(); + topology.add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz)); + + primitivesToIRLayersMap[outputHiddenID] = { layer->name }; + primitivesToIRLayersMap[outputCellID] = { layer->name }; + + // output primitive IDs + primitiveIDs[outputHiddenID] = outputHiddenID; // LSTMCell:LSTMCell - "concat hidden" + primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = outputHiddenID; // LSTMCell:LSTMCell:0 - hidden state + primitiveIDs[outputCellID] = outputCellID; // LSTMCell:LSTMCell:1 - cell state + + profilingIDs.push_back(layerName); +} + +void Program::CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size; + bool hasInitialHidden = false, hasInitialCell = false, hasBias = false, isForward = true; + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + std::string layerName = layer_type_name_ID(layer); + cldnn::primitive_id weightID = layerName + m_weightsTag; + cldnn::primitive_id biasID = layerName + m_biasesTag; + auto rnnLayer = as (layer); + bool permute_input = (1 != rnnLayer->axis); + + /* check incoming CNN layer and setup required variables */ + { + if (rnnLayer->cellType != RNNSequenceLayer::LSTM) + THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell"; + + auto in_data0 = layer->insData[0].lock(); + if (!in_data0) + THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name; + + const auto in_dims0 = in_data0->getTensorDesc().getDims(); + const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims(); + + /* do we have initial hidden and cell? + if blobs are not null, direct the data from them + into corresponding LSTM inputs */ + auto in_data1 = layer->insData[1].lock(); + if (in_data1) { + hasInitialHidden = true; + } + + auto in_data2 = layer->insData[2].lock(); + if (in_data2) { + hasInitialCell = true; + } + + if (in_dims0.size() != 3 || + in_data1->getTensorDesc().getDims().size() != 2 || + in_data2->getTensorDesc().getDims().size() != 2) + THROW_IE_EXCEPTION << "Wrong input shapes for RNN Layer " << layer->name; + + if (!permute_input) { + lstm_batch_size = in_dims0.front(); + lstm_sequence_len = in_dims0[1]; + } else { + lstm_batch_size = in_dims0[1]; + lstm_sequence_len = in_dims0.front(); + } + + lstm_input_size = in_dims0.back(); + lstm_hidden_size = out_dims0.back(); + + if (rnnLayer->direction != RNNSequenceLayer::FWD && rnnLayer->direction != RNNSequenceLayer::BWD) + THROW_IE_EXCEPTION << "Support only forward and backward direction for RNN Layer " << layer->name; + isForward = rnnLayer->direction == RNNSequenceLayer::FWD; + } + + /* Prepare weight/bias memory primitives */ + { + auto wLayer = as(layer); + auto pWeightsBlob = wLayer->_weights; + cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(4 * lstm_hidden_size), cldnn::feature(1), cldnn::spatial(lstm_input_size + lstm_hidden_size, 1)); + cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor); + weightID = CreatePrimitiveFromBlob(topology, weightID, pWeightsBlob, WLayout); + + /* create bias memory primitive */ + auto pBiasBlob = wLayer->_biases; + if (pBiasBlob != nullptr) { + cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1)); + cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, bTensor); + + biasID = CreatePrimitiveFromBlob(topology, biasID, pBiasBlob, BLayout); + hasBias = true; + } + } + + std::vector> input_ids_offsets; + std::vector output_ids_offsets; + + cldnn::primitive_id inReshapeID = layerName + "_inReshape"; + cldnn::primitive_id permuteID = layerName + "_inputReorder"; + cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape"; + + cldnn::tensor inputShape; + + if (permute_input) { + inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, 1 }; + } else { + inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 }; + } + cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 }; + cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape); + topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape)); + topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout)); + + topology.add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape)); + topology.add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape)); + + primitivesToIRLayersMap[inReshapeID] = { layer->name }; + primitivesToIRLayersMap[permuteID] = { layer->name }; + primitivesToIRLayersMap[inHiddenReshapeID+"_1"] = { layer->name }; + primitivesToIRLayersMap[inHiddenReshapeID+"_2"] = { layer->name }; + + for (int i = 0; i < lstm_sequence_len; ++i) + input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} }); + + cldnn::primitive_id inputSplitID = layerName + "_inputSplit"; + + if (permute_input) { + topology.add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 })); + topology.add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets)); + + primitivesToIRLayersMap[layerName + "_inputSwap"] = { layer->name }; + primitivesToIRLayersMap[inputSplitID] = { layer->name }; + } else { + topology.add(cldnn::split(inputSplitID, permuteID, input_ids_offsets)); + primitivesToIRLayersMap[inputSplitID] = { layer->name }; + } + + cldnn::tensor gemmSz = cldnn::tensor{ lstm_batch_size, 1, 4 * lstm_hidden_size, 1 }; + cldnn::layout gemmLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, gemmSz); + cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 }; + cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0}; + std::string hiddenStr = hasInitialHidden ? inHiddenReshapeID+"_1" : ""; + std::string cellStr = hasInitialCell ? inHiddenReshapeID+"_2" : ""; + + for (int i = 0; i < lstm_sequence_len; ++i) { + std::string concatID = layerName + "_inputConcat" + get_string_id(i); + std::string lstm_fc_id = layerName + "_fully_connected" + get_string_id(i); + std::string lstm_fc_resh_id = layerName + "_gemmReshape" + get_string_id(i); + std::string lstm_fc_reor_id = layerName + "_gemmReorder" + get_string_id(i); + std::string lstm_elt_id = layerName + "_lstm_elt" + get_string_id(i); + std::string crop_id = layerName + "_crop" + get_string_id(i); + + int seqIdx = isForward ? i : lstm_sequence_len - 1 - i; + if (hiddenStr != "") { + topology.add(cldnn::concatenation(concatID, { inputSplitID + ":" + get_string_id(seqIdx), hiddenStr }, + cldnn::concatenation::concatenation_axis::along_x)); + topology.add(cldnn::fully_connected(lstm_fc_id, concatID, weightID, hasBias ? biasID : "")); + } else { + topology.add(cldnn::fully_connected(lstm_fc_id, inputSplitID + ":" + get_string_id(seqIdx), weightID, hasBias ? biasID : "")); + } + + topology.add(cldnn::reshape(lstm_fc_resh_id, lstm_fc_id, gemmSz)); + topology.add(cldnn::reorder(lstm_fc_reor_id, lstm_fc_resh_id, gemmLayout)); + topology.add(cldnn::lstm_elt(lstm_elt_id, lstm_fc_reor_id, + cellStr, 0, 0, {}, {}, + cldnn_lstm_offset_order_fizo)); + + hiddenStr = crop_id + ":hidden"; + cellStr = crop_id + ":cell"; + topology.add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 })); + output_ids_offsets.push_back(hiddenStr); + + primitivesToIRLayersMap[lstm_fc_id] = { layer->name }; + primitivesToIRLayersMap[lstm_elt_id] = { layer->name }; + primitivesToIRLayersMap[hiddenStr] = { layer->name }; + + if (i < lstm_sequence_len - 1) { + topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz)); + primitivesToIRLayersMap[cellStr] = { layer->name }; + } else { + // last hidden state crop (output 2) + if (layer->outData.size() > 1) { + cldnn::primitive_id outputHiddenID = layer_type_lower(layer) + ":" + layer->outData[1]->getName(); + primitiveIDs[hiddenStr] = hiddenStr; + primitiveIDs[outputHiddenID] = hiddenStr; + } + + // last cell state crop (output 3) + if (layer->outData.size() > 2) { + topology.add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz)); + cldnn::primitive_id outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName(); + primitivesToIRLayersMap[cellStr] = { layer->name }; + primitiveIDs[cellStr] = cellStr; + primitiveIDs[outputCellID] = cellStr; + } + } + } + + if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end()); + + if (permute_input) { + topology.add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f)); + topology.add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 })); + primitivesToIRLayersMap[layerName + "_outputConcat"] = { layer->name }; + } else { + topology.add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f)); + } + + primitivesToIRLayersMap[layerName] = { layer->name }; + primitiveIDs[layerName] = layerName; + primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = layerName; + profilingIDs.push_back(layerName); +} + +void Program::CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + int lstm_batch_size, lstm_sequence_len, lstm_input_size, lstm_hidden_size; + bool hasBias = false, reverseSeq = false; + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision)); + std::string layerName = layer_type_name_ID(layer); + cldnn::primitive_id weightID = layerName + m_weightsTag; + cldnn::primitive_id recurrentID = weightID + "_recurrent"; + cldnn::primitive_id biasID = layerName + m_biasesTag; + auto rnnLayer = as(layer); + bool permute_input = (1 != rnnLayer->axis); + int32_t directions = 1; + + /* check incoming CNN layer and setup required variables */ + { + if (rnnLayer->cellType != RNNSequenceLayer::LSTM) + THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell"; + + auto in_data0 = layer->insData[0].lock(); + if (!in_data0) + THROW_IE_EXCEPTION << "Missing first input for RNN layer " << layer->name; + + const auto in_dims0 = in_data0->getTensorDesc().getDims(); + const auto out_dims0 = layer->outData[0]->getTensorDesc().getDims(); + + auto in_data1 = layer->insData[1].lock(); + auto in_data2 = layer->insData[2].lock(); + auto in_data3 = layer->insData[3].lock(); + + if (in_dims0.size() != 3 || + in_data1->getTensorDesc().getDims().size() != 2 || + in_data2->getTensorDesc().getDims().size() != 2 || + in_data3->getTensorDesc().getDims().size() != 1) + THROW_IE_EXCEPTION << "Wrong input shapes for dynamic RNN Layer " << layer->name; + + if (!permute_input) { + lstm_batch_size = in_dims0.front(); + lstm_sequence_len = in_dims0[1]; + } else { + lstm_batch_size = in_dims0[1]; + lstm_sequence_len = in_dims0.front(); + } + + lstm_input_size = in_dims0.back(); + lstm_hidden_size = out_dims0.back(); + + if (rnnLayer->direction == RNNSequenceLayer::BDR) { + directions = 2; + } else { + reverseSeq = rnnLayer->direction == RNNSequenceLayer::BWD; + } + } + + /* Prepare weight/bias memory primitives - split weight blob into W and R */ + { + const size_t WchunkSz = lstm_input_size * elementSize; + const size_t RchunkSz = lstm_hidden_size * elementSize; + + cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size)); + cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size)); + cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor); + cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor); + + auto wLayer = as(layer); + + { + auto pWeightsBlob = wLayer->_weights; + auto blobBytes = static_cast(pWeightsBlob->buffer()); + + auto wmem = cldnn::memory::allocate(*m_engine, WLayout); + auto wtmpPointer = wmem.pointer(); // implicitly maps buffer - unmap in destructor + + auto rmem = cldnn::memory::allocate(*m_engine, RLayout); + auto rtmpPointer = rmem.pointer(); + + auto wBytes = wtmpPointer.data(); + auto rBytes = rtmpPointer.data(); + + for (int h = 0; h < 4 * lstm_hidden_size; h++) { + // copy "input size" elements to W + for (size_t b = 0; b < WchunkSz; b++) + *wBytes++ = *blobBytes++; + + // copy "lstm_hidden_size" elements to R + for (size_t b = 0; b < RchunkSz; b++) + *rBytes++ = *blobBytes++; + } + + topology.add(cldnn::data(weightID, wmem)); + topology.add(cldnn::data(recurrentID, rmem)); + } + + /* create bias memory primitive */ + auto pBiasBlob = wLayer->_biases; + if (pBiasBlob != nullptr) { + cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(directions), cldnn::spatial(4 * lstm_hidden_size, 1)); + cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, bTensor); + + auto bmem = cldnn::memory::allocate(*m_engine, BLayout); + auto btmpPointer = bmem.pointer(); + + auto blobBytes = static_cast(pBiasBlob->buffer()); + const size_t BchunkSz = lstm_hidden_size * elementSize; + auto bBytes = btmpPointer.data(); + + for (size_t b = 0; b < 4 * BchunkSz; b++) + *bBytes++ = *blobBytes++; + + topology.add(cldnn::data(biasID, bmem)); + hasBias = true; + } + } + + cldnn::primitive_id inReshapeID = layerName + "_inReshape"; + cldnn::primitive_id permuteID = layerName + "_inputReorder"; + cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape"; + + cldnn::tensor inputShape; + + if (permute_input) { + inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, directions }; + } else { + inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, directions }; + } + cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, directions }; + cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape); + topology.add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape)); + topology.add(cldnn::reorder(permuteID, inReshapeID, inputLayout)); + + topology.add(cldnn::reshape(inHiddenReshapeID + "_1", inputPrimitives[1], hiddenStateShape)); + topology.add(cldnn::reshape(inHiddenReshapeID + "_2", inputPrimitives[2], hiddenStateShape)); + + cldnn::primitive_id dynID = layerName + "_dynLength"; + cldnn::primitive_id dynReshapeID = layerName + "_dynReshape"; + cldnn::tensor dynShape = { 1, 1, lstm_batch_size, 1 }; + cldnn::layout dynLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, dynShape); + topology.add(cldnn::reshape(dynReshapeID, inputPrimitives[3], dynShape)); + topology.add(cldnn::reorder(dynID, dynReshapeID, dynLayout)); + + primitivesToIRLayersMap[inReshapeID] = { layer->name }; + primitivesToIRLayersMap[permuteID] = { layer->name }; + primitivesToIRLayersMap[inHiddenReshapeID + "_1"] = { layer->name }; + primitivesToIRLayersMap[inHiddenReshapeID + "_2"] = { layer->name }; + + cldnn::primitive_id inputID = permuteID; + cldnn::primitive_id prevInputID = permuteID; + + if (permute_input) { + inputID = layerName + "_inputSwap"; + topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 })); + prevInputID = inputID; + } + primitivesToIRLayersMap[inputID] = { layer->name }; + + cldnn::primitive_id seq_len_id = layer->name + "seq_lengths"; + if (reverseSeq) { + inputID = layerName + "_inputReverse"; + topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0)); + primitivesToIRLayersMap[inputID] = { layer->name }; + prevInputID = inputID; + } + + // last hidden state crop (output 2) + cldnn::primitive_id outputHiddenID = "", outputCellID = ""; + if (layer->outData.size() > 1) { + outputHiddenID = layer_type_lower(layer) + ":" + layer->outData[1]->getName(); + auto last_hidden_mem = cldnn::memory::allocate(*m_engine, + { DataTypeFromPrecision(layer->precision), + cldnn::format::bfyx, { lstm_batch_size, 1, lstm_hidden_size, directions } }); + topology.add(cldnn::mutable_data(outputHiddenID, last_hidden_mem)); + primitiveIDs[outputHiddenID] = outputHiddenID; + } + + // last cell state crop (output 3) + if (layer->outData.size() > 2) { + outputCellID = layer_type_lower(layer) + ":" + layer->outData[2]->getName(); + auto last_cell_mem = cldnn::memory::allocate(*m_engine, + { DataTypeFromPrecision(layer->precision), + cldnn::format::bfyx, { lstm_batch_size, 1, lstm_hidden_size, directions } }); + topology.add(cldnn::mutable_data(outputCellID, last_cell_mem)); + primitiveIDs[outputCellID] = outputCellID; + } + + // main part - dLSTM primitive intself + cldnn::primitive_id dlstmID = layerName + "_dlstm"; + topology.add(cldnn::lstm_dynamic(dlstmID, inputID, dynID, + weightID, recurrentID, outputHiddenID, outputCellID, biasID, + inHiddenReshapeID + "_1", inHiddenReshapeID + "_2")); + prevInputID = inputID = dlstmID; + primitivesToIRLayersMap[dlstmID] = { layer->name }; + + if (reverseSeq) { + inputID = layerName + "_outputReverse"; + topology.add(cldnn::reverse_sequence(inputID, prevInputID, dynID, 1, 0)); + primitivesToIRLayersMap[inputID] = { layer->name }; + prevInputID = inputID; + } + + if (permute_input) { + inputID = layerName + "_outputSwap"; + topology.add(cldnn::permute(inputID, prevInputID, { 1, 0, 2, 3 })); + primitivesToIRLayersMap[inputID] = { layer->name }; + prevInputID = inputID; + } + + primitiveIDs[layerName] = inputID; + primitiveIDs[inputID] = inputID; + primitiveIDs[layer_type_lower(layer) + ":" + layer->outData[0]->getName()] = inputID; + profilingIDs.push_back(layerName); +} + +void Program::CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + if (layer->insData.size() > 3) { + CreateDynamicLSTM(topology, layer); + } else { + CreateRegularLSTM(topology, layer); + } +} + +}; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_program.cpp b/inference-engine/src/cldnn_engine/cldnn_program.cpp new file mode 100644 index 00000000000000..cd8cf98fc24bca --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_program.cpp @@ -0,0 +1,4579 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cldnn_program.h" +#include "simple_math.h" +#include +#include +#include +#include +#include +#include "cldnn_infer_request.h" +#include +#include "details/caseless.hpp" +#include +#include +#include +#include +#include +#include "cnn_network_int8_normalizer.hpp" + +using namespace InferenceEngine; +using namespace InferenceEngine::details; + +namespace CLDNNPlugin { + +const cldnn::primitive_id Program::m_preProcessTag("_cldnn_input_preprocess"); +const cldnn::primitive_id Program::m_weightsTag("_cldnn_weights"); +const cldnn::primitive_id Program::m_biasesTag("_cldnn_biases"); +const cldnn::primitive_id Program::m_meanValuesTag("_cldnn_mean_values"); +const cldnn::primitive_id Program::m_postProcessTag("_cldnn_output_postprocess"); +const cldnn::primitive_id Program::m_scalesTag("_cldnn_scales"); +const cldnn::primitive_id Program::m_preCustomLayerTag("_cldnn_custom_preprocess"); +const cldnn::primitive_id Program::m_postCustomLayerTag("_cldnn_custom_postprocess"); + +static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned inputs) { // todo: add more checks + if (inputs && layer->insData.size() != inputs) { + THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name); + } + if (layer->_fusedWith) { + THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name); + } +} + +static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) { + if (layer->_fusedWith) { + THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name); + } +} + +static InferenceEngine::Blob::Ptr getBlobOrNull(const InferenceEngine::CNNLayerPtr& layer, std::string name) { + auto result = layer->blobs.find(name); + if (result != layer->blobs.end()) { + return result->second; + } else { + return nullptr; + } +} + +static InferenceEngine::Blob::Ptr getBlob(const InferenceEngine::CNNLayerPtr& layer, std::string name) { + auto result = getBlobOrNull(layer, name); + if (result == nullptr) { + THROW_CLDNN_EXCEPTION("Missing blob " << name << " in layer " << layer->name); + } + + return result; +} + +static cldnn::format defaultFormatForDims(size_t dimensions) { + switch (dimensions) { + case 1: + case 2: + case 3: + case 4: + return cldnn::format::bfyx; + case 5: + return cldnn::format::bfzyx; + case 6: + return cldnn::format::bfwzyx; + default: + THROW_CLDNN_EXCEPTION("Unsupported number of dimensions: " << dimensions); + } + + return cldnn::format::bfyx; // Should not get here +} + + +#if defined(_WIN32) +#define mkdir(dir, mode) _mkdir(dir) +#endif + +void Program::changeInputBatch(int batch) { + m_curBatch = batch; +} + +bool Program::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const { + InputsDataMap inputs; + network.getInputsInfo(inputs); + + CNNLayerSet inputLayers; + std::unordered_set allLayers; + + if (inputs.empty()) + return false; + + auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo(); + if (secondLayers.empty()) + return false; + + bool check_result = true; + details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) { + auto type = LayerTypeFromStr(layer->type); + if (SimplerNMS == type || + ROIPooling == type || + PriorBox == type || + DetectionOutput == type || + Reshape == type || + Permute == type || + Flatten == type || + Proposal == type || + PSROIPooling == type ) { + check_result = false; + } + + // check for custom layer + auto customLayer = m_config.customLayers.find(layer->type); + if (customLayer != m_config.customLayers.end()) { + check_result = false; + } + }, false); + + return check_result; +} + +Program::Program(InferenceEngine::ICNNNetwork& network, std::shared_ptr engine, const Config& config) + : m_config(config) + , m_defaultFormat(cldnn::format::bfyx) + , m_engine(engine) + , m_curBatch(-1) + , p_currentOutputs({}) { + InitFormat(network); + + if (config.enableInt8) { + ICNNNetworkStats* pstats = nullptr; + StatusCode s = network.getStats(&pstats, nullptr); + + // Check for FP32 main precision as further quantization of FP16 seems pointless and is not supported by normalizer + if (s == StatusCode::OK && pstats && !pstats->isEmpty() && network.getPrecision() == Precision::FP32) { + CNNNetworkInt8Normalizer normalizer; + normalizer.NormalizeNetwork(network, *pstats); + } + } + + NetPass::CombineRNNSeq(network); + for (int i = 0; i < 2; i++) { + NetPass::UnrollTI(network); + NetPass::UnrollRNN_if(network, [](const RNNCellBase &rnn) -> bool { + if (rnn.clip != 0.0f) + return true; + if (rnn.type == "GRUCell" || + rnn.type == "GRUSequence" || + rnn.type == "RNNCell" || + rnn.type == "RNNSequence") + return true; + if (!(rnn.type == "LSTMCell" || rnn.type == "LSTMSequence") || + rnn.activations == std::vector{"sigmoid", "tanh", "tanh"}) + return false; + return true; + }); + } + + if (m_config.max_dynamic_batch > 1) { + // check topology for applicability + if (!CanProcessDynBatch(network)) { + THROW_CLDNN_EXCEPTION("Such topology cannot be compiled for dynamic batch!"); + } + } + + int m_bv_sz = GetMaxBatchSizeForSingleProgram(); + + m_max_batch = config.max_dynamic_batch; + + if (config.max_dynamic_batch > 1) { + for (int b = m_bv_sz - 1; b >= 0; b--) { + inputLayouts.clear(); + outputDims.clear(); + primitiveIDs.clear(); + blobMemCache.clear(); + + changeInputBatch(1 << b); + m_programs.insert(m_programs.begin(), BuildProgram(network)); + m_engine->release_pending_memory(0); + } + } else { + m_programs.emplace_back(BuildProgram(network)); + m_engine->release_pending_memory(0); + } +} + +int Program::GetMaxBatchSizeForSingleProgram() { + if (m_config.max_dynamic_batch > 1) { + // calculate number of networks necessary based on binary log + unsigned int tmp = m_config.max_dynamic_batch; + unsigned int mask = 1 << 31; + unsigned int ldigit = 31; + + while (!(tmp & mask)) { + mask >>= 1; + ldigit--; + } + + return ldigit + 1; + } + + return 0; +} + +std::shared_ptr Program::getCompiledProgram(int program_id) { + if (program_id >= m_programs.size()) + THROW_CLDNN_EXCEPTION("Invalid program ID"); + + return m_programs[program_id]; +} + +std::vector Program::GetNextLayers(const InferenceEngine::DataPtr data) { + std::vector nextLayers; + if (data == nullptr) { + return nextLayers; + } + for (auto nl : data->getInputTo()) { + nextLayers.push_back(nl.second); + } + return nextLayers; +} + +std::vector Program::GetNextLayers(const InferenceEngine::CNNLayerPtr layer) { + std::vector nextLayers; + if (layer == nullptr) { + return nextLayers; + } + for (auto od : layer->outData) { + auto nextLayersVec = GetNextLayers(od); + for (auto nl : nextLayersVec) { + nextLayers.push_back(nl); + } + } + return nextLayers; +} + +InferenceEngine::CNNLayerPtr Program::GetNextSingleLayer(const InferenceEngine::DataPtr data) { + if (data == nullptr) { + return nullptr; + } + auto nextLayers = GetNextLayers(data); + IE_ASSERT(nextLayers.size() == 1); + return nextLayers[0]; +} + +InferenceEngine::CNNLayerPtr Program::GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer) { + if (layer == nullptr) { + return nullptr; + } + auto nextLayers = GetNextLayers(layer); + IE_ASSERT(nextLayers.size() == 1); + return nextLayers[0]; +} + +void Program::InitFormat(InferenceEngine::ICNNNetwork &network) { + m_defaultFormat = FormatFromLayout(InferenceEngine::Layout::NCHW); +} + +std::shared_ptr Program::BuildProgram(InferenceEngine::ICNNNetwork &network) { + cldnn::build_options options; + if (!m_config.graph_dumps_dir.empty()) { + options.set_option(cldnn::build_option::graph_dumps_dir(m_config.graph_dumps_dir)); + } + options.set_option(cldnn::build_option::optimize_data(true)); + options.set_option(cldnn::build_option::tuning_config(m_config.tuningConfig)); + + cldnn::topology topology; + + // 1. create inputs + InferenceEngine::InputsDataMap networkInputs; + network.getInputsInfo(networkInputs); + + InferenceEngine::OutputsDataMap networkOutputs; + network.getOutputsInfo(networkOutputs); + p_currentOutputs = networkOutputs; + + if (networkInputs.empty()) { + THROW_CLDNN_EXCEPTION("No inputs detected."); + } + + using LayerVect = std::vector; + std::list layersToHandle; + + auto push_if = [&](const LayerVect& clist) { + for (auto& l : clist) { + if ( (std::find_if( layersToHandle.begin(), + layersToHandle.end(), + [&](const CNNLayerPtr& x) { return layer_type_name_ID(x) == layer_type_name_ID(l); } )) == layersToHandle.end() ) + layersToHandle.push_back(l); + } + }; + + auto allInputs = CNNNetGetAllInputLayers(network); + for (auto input : allInputs) { + if (LayerTypeFromStr(input->type) == ConstantBlob) { + AddConstantBlobInput(topology, input); + } else { + auto iter = networkInputs.find(input->name); // regular input + if (iter != networkInputs.end()) { + AddInputPrimitive(topology, iter->second, input->precision, layer_type_name_ID(input)); + } + } + // collect next layers to process + push_if(GetNextLayers(input)); + } + + // 2. traverse layers + unsigned infLoopProtection = 0; + while (!layersToHandle.empty()) { + if (infLoopProtection++ >= layersToHandle.size()) { + THROW_CLDNN_EXCEPTION("Infinite loop during network creation"); + break; + } + InferenceEngine::CNNLayerPtr currLayer = layersToHandle.front(); + layersToHandle.pop_front(); + auto layerName = layer_type_name_ID(currLayer); + + if (primitiveIDs.find(layerName) != primitiveIDs.end()) { + infLoopProtection = 0; + continue; // this layer was already added (had multiple inputs) + } + + bool missingInput = false; + try { + GetPrevLayersPrimitives(currLayer); + } catch (std::exception) { + missingInput = true; + } + + if (missingInput) { // some inputs aren't created yet + layersToHandle.push_back(currLayer); // push the current layer to the end of the line + continue; // move on to the next layer + } + + infLoopProtection = 0; // found a layer with all inputs already existing + CreateSingleLayerPrimitive(topology, currLayer); // currLayer will be advanced if layer was skipped or merged + prevPrimitiveIDs[layerName] = GetPrevLayersPrimitives(currLayer); + + push_if(GetNextLayers(currLayer)); + } + + // 3. Handle output reordering + for (auto output : networkOutputs) { + // always reorder and let clDNN remove unneeded reorders + AddOutputPrimitive(topology, output.first, output.second); + } + + // 4. ??? + // 5. profit + p_currentOutputs.clear(); + + return std::make_shared(*m_engine, topology, options); +} + +Program::LayerType Program::LayerTypeFromStr(const std::string &str) { + static const caseless_map LayerNameToType = { + { "Convolution" , Convolution }, + { "DeformableConvolution" , DeformableConvolution }, + { "ReLU" , ReLU }, + { "ReLU6" , ReLU6 }, + { "Sigmoid" , Sigmoid }, + { "Logistic" , Sigmoid }, + { "TanH" , TanH }, + { "ELU" , ELU }, + { "Activation" , Activation }, + { "Exp" , Exp }, + { "Not" , Not }, + { "Norm" , LRN }, + { "Pooling" , Pooling }, + { "FullyConnected" , FullyConnected }, + { "SoftMax" , SoftMax }, + { "Power" , Power }, + { "Split" , Split }, + { "Slice" , Split }, + { "Concat" , Concatenate }, + { "Eltwise" , Eltwise }, + { "SimplerNMS" , SimplerNMS }, + { "ROIPooling" , ROIPooling }, + { "Crop" , Crop }, + { "Deconvolution" , Deconvolution }, + { "PriorBox" , PriorBox }, + { "DetectionOutput" , DetectionOutput }, + { "Normalize" , Normalize }, + { "Reshape" , Reshape }, + { "Permute" , Permute }, + { "Flatten" , Flatten }, + { "BatchNormalization" , BatchNormalization }, + { "PReLU" , PReLU }, + { "ScaleShift" , ScaleShift }, + { "Proposal" , Proposal }, + { "PSROIPooling" , PSROIPooling }, + { "Clamp" , Clamp }, + { "Copy" , Copy }, + { "Upsampling" , Upsampling }, + { "Resample" , Resample }, + { "RegionYolo" , RegionYolo }, + { "ReorgYolo" , ReorgYolo }, + { "Const" , ConstantBlob }, + { "ArgMax" , ArgMax }, + { "ArgMin" , ArgMin }, + { "MVN" , MVN }, + { "Unpooling" , Unpooling }, + { "Tile" , Tile }, + { "Pad" , Pad }, + { "LSTMCell" , LSTMCell }, + { "LSTMSequence" , RNN }, + { "RNNSequence" , RNN }, + { "Gather" , Gather }, + { "DepthToSpace" , DepthToSpace }, + { "ShuffleChannels" , ShuffleChannels }, + { "StridedSlice" , StridedSlice }, + { "ReverseSequence" , ReverseSequence }, + { "BinaryConvolution" , BinaryConvolution }, + { "Quantize" , Quantize }, + { "Broadcast" , Broadcast }, + { "Squeeze" , Squeeze }, + { "Unsqueeze" , Unsqueeze }, + { "ReduceMax" , Reduce }, + { "ReduceMin" , Reduce }, + { "ReduceMean" , Reduce }, + { "ReduceProd" , Reduce }, + { "ReduceSum" , Reduce }, + { "ReduceAnd" , Reduce }, + { "ReduceOr" , Reduce }, + { "ReduceSumSquare" , Reduce }, + { "ReduceL1" , Reduce }, + { "ReduceL2" , Reduce }, + { "ReduceLogSum" , Reduce }, + { "ReduceLogSumExp" , Reduce }, + { "TopK" , TopK }, + { "Asin" , Asin }, + { "Atan" , Atan }, + { "Acos" , Acos }, + { "Abs" , Abs }, + { "Acosh" , Acosh }, + { "Atanh" , Atanh }, + { "Floor" , Floor }, + { "Ceil" , Ceil }, + { "Erf" , Erf }, + { "HardSigmoid" , HardSigmoid }, + { "Log" , Log }, + { "Neg" , Neg }, + { "Reciprocal" , Reciprocal }, + { "Selu" , Selu }, + { "Sign" , Sign }, + { "SoftPlus" , SoftPlus }, + { "SoftSign" , SoftSign }, + { "Tan" , Tan }, + { "GEMM", Gemm }, + { "OneHot", OneHot} + }; + auto it = LayerNameToType.find(str); + if (it != LayerNameToType.end()) + return it->second; + else + return NO_TYPE; +} + +cldnn::pooling_mode Program::PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding) { + switch (pt) { + case InferenceEngine::PoolingLayer::PoolType::MAX: + return cldnn::pooling_mode::max; + case InferenceEngine::PoolingLayer::PoolType::AVG: + return excludePadding ? cldnn::pooling_mode::average_no_padding : cldnn::pooling_mode::average; + default: + THROW_CLDNN_EXCEPTION("Unsupported pooling type: " << pt); + break; + } + + return cldnn::pooling_mode::max; // shouldn't get here +} + +cldnn::eltwise_mode Program::EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op) { + switch (op) { + case InferenceEngine::EltwiseLayer::Sum: + return cldnn::eltwise_mode::sum; + case InferenceEngine::EltwiseLayer::Prod: + return cldnn::eltwise_mode::prod; + case InferenceEngine::EltwiseLayer::Max: + return cldnn::eltwise_mode::max; + case InferenceEngine::EltwiseLayer::Sub: + return cldnn::eltwise_mode::sub; + case InferenceEngine::EltwiseLayer::Min: + return cldnn::eltwise_mode::min; + case InferenceEngine::EltwiseLayer::Div: + return cldnn::eltwise_mode::div; + case InferenceEngine::EltwiseLayer::Squared_diff: + return cldnn::eltwise_mode::squared_diff; + case InferenceEngine::EltwiseLayer::Equal: + return cldnn::eltwise_mode::eq; + case InferenceEngine::EltwiseLayer::Not_equal: + return cldnn::eltwise_mode::ne; + case InferenceEngine::EltwiseLayer::Less: + return cldnn::eltwise_mode::lt; + case InferenceEngine::EltwiseLayer::Less_equal: + return cldnn::eltwise_mode::le; + case InferenceEngine::EltwiseLayer::Greater: + return cldnn::eltwise_mode::gt; + case InferenceEngine::EltwiseLayer::Greater_equal: + return cldnn::eltwise_mode::ge; + case InferenceEngine::EltwiseLayer::Logical_AND: + return cldnn::eltwise_mode::logic_and; + case InferenceEngine::EltwiseLayer::Logical_OR: + return cldnn::eltwise_mode::logic_or; + case InferenceEngine::EltwiseLayer::Logical_XOR: + return cldnn::eltwise_mode::logic_xor; + case InferenceEngine::EltwiseLayer::Pow: + return cldnn::eltwise_mode::pow; + case InferenceEngine::EltwiseLayer::Floor_mod: + return cldnn::eltwise_mode::floor_mod; + default: THROW_CLDNN_EXCEPTION("Unsupported eltwise operation: " << op); + break; + } + + return cldnn::eltwise_mode::max; // shouldn't get here +} + +auto CldnnTensorFromIEDims = [](const InferenceEngine::SizeVector& dims, int def = 1) { + switch (dims.size()) { + case 1: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(def), cldnn::spatial(def, def)); + case 2: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, def)); + case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(def, dims[2])); + case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2])); + case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2])); + case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2])); + default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") for clDNN tensor"); + } +}; + +cldnn::primitive_id Program::CreatePrimitiveFromBlob(cldnn::topology& topology, + cldnn::primitive_id primID, + const InferenceEngine::Blob::Ptr pBlob, + const cldnn::layout& blobLayout, + size_t blobByteOffset, + WeightRearrangeType rearrange) { +// The condition below is not valid once we use groups - todo: think of some other size check here +// if ((pBlob != nullptr) && +// (pBlob->size() * (broadcastFeatures ? blobLayout.size.feature[0] : 1)) != blobLayout.count()) { +// THROW_CLDNN_EXCEPTION("Unexpected blob size"); +// } + if (pBlob == nullptr) { + THROW_CLDNN_EXCEPTION("Missing blob data: " << primID); + } + + auto data = static_cast(pBlob->buffer()) + blobByteOffset; + + auto bufIter = blobMemCache.find(data); + + if (bufIter != blobMemCache.end()) { + return bufIter->second; + } + + auto mem = cldnn::memory::allocate(*m_engine, blobLayout); + auto tmpPointer = mem.pointer(); // implicitly maps buffer - unmap in destructor + auto buf = tmpPointer.data(); + auto bufSize = blobLayout.bytes_count(); + + const auto descLayout = pBlob->getTensorDesc().getLayout(); + if ((descLayout != InferenceEngine::OIHW) && + (descLayout != InferenceEngine::NCDHW) && + (descLayout != InferenceEngine::NCHW) && + (descLayout != InferenceEngine::CHW) && + (descLayout != InferenceEngine::NC) && + (descLayout != InferenceEngine::C)) { + // TODO: support more layouts + THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(descLayout) << ") in blob: " << primID); + } else if (rearrange == BroadcastFeatures) { + size_t features = static_cast(blobLayout.size.feature[0]); + if (pBlob->size() != features) { + THROW_CLDNN_EXCEPTION("Invalid blob dimensions to broadcast: " << primID); + } + auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type); + size_t featureElements = blobLayout.count() / static_cast(blobLayout.size.feature[0]); + IE_ASSERT(blobLayout.format == cldnn::format::bfyx); + for (size_t f = 0; f < features; f++) { + for (size_t e = 0; e < featureElements; e++) { + for (size_t b = 0; b < elementSize; b++) { + buf[(f*featureElements + e)*elementSize + b] = data[f*elementSize + b]; + } + } + } + } else if (rearrange == FlipDeconvDims) { + auto elementSize = cldnn::data_type_traits::size_of(blobLayout.data_type); + + size_t inputFeatureElements = static_cast(blobLayout.size.feature[0]); + size_t outputFeatureElements = static_cast(blobLayout.size.batch[0]); + + size_t featureSize = elementSize * blobLayout.size.spatial[0] * blobLayout.size.spatial[1]; + if (blobLayout.format == cldnn::format::bfzyx) + featureSize *= static_cast(blobLayout.size.spatial[2]); + + for (size_t i = 0; i < inputFeatureElements; i++) { + for (size_t o = 0; o < outputFeatureElements; o++) { + size_t outputShift = (o*inputFeatureElements + i)*featureSize; + size_t inputShift = (i*outputFeatureElements + o)*featureSize; + + for (size_t b = 0; b < featureSize; b++) { + buf[outputShift + b] = data[inputShift + b]; + } + } + } + } else { + for (size_t i = 0; i < bufSize; i++) { + buf[i] = data[i]; + } + } + topology.add(cldnn::data(primID, mem)); + blobMemCache[data] = primID; + return primID; +} + +void Program::CreateWeightAndBiasPrimitives(cldnn::topology& topology, + const InferenceEngine::CNNLayerPtr& layer, + std::vector& weightsPrimID, + std::vector& biasesPrimID) { + cldnn::tensor::value_type inFeatures = 1; // todo: workaround for xyf input, handle general case (xf, xyzf etc...) + std::shared_ptr insData0 = layer->insData[0].lock(); + IE_ASSERT(insData0 != nullptr); + const auto in0dims = insData0->getTensorDesc().getDims(); + if (in0dims.size() > 1) { + inFeatures = TensorValue(in0dims[1]); + } + cldnn::tensor::value_type outFeatures(0); + std::vector weightDimsVec; + InferenceEngine::Blob::Ptr pWeightsBlob, pBiasBlob; + unsigned groupSize = 1; + WeightRearrangeType rearrange = NO_REARRANGE; + + switch (LayerTypeFromStr(layer->type)) { + case Convolution: { + auto convLayer = as (layer); + groupSize = convLayer->_group; + if ((inFeatures % groupSize) || (convLayer->_out_depth % groupSize)) { + THROW_CLDNN_EXCEPTION("Invalid group size in layer " << convLayer->name); + } + if (groupSize >= 16) // cldnn optimization for 16 and more groups + groupSize = 1; + + weightDimsVec = { TensorValue(convLayer->_out_depth / groupSize), TensorValue(inFeatures / convLayer->_group) }; + for (size_t i = 0; i < convLayer->_kernel.size(); i++) { + weightDimsVec.push_back(TensorValue(convLayer->_kernel[i])); + } + outFeatures = convLayer->_out_depth; + pWeightsBlob = getBlob(layer, "weights"); + pBiasBlob = getBlobOrNull(layer, "biases"); + break; + } + case Deconvolution: { + auto deconvLayer = as (layer); + groupSize = deconvLayer->_group; + if ((inFeatures % groupSize) || (deconvLayer->_out_depth % groupSize)) { + THROW_CLDNN_EXCEPTION("Invalid group size in layer " << deconvLayer->name); + } + if (groupSize >= 16) // cldnn optimization for 16 and more groups + groupSize = 1; + + weightDimsVec = { TensorValue(deconvLayer->_out_depth / groupSize), TensorValue(inFeatures / deconvLayer->_group) }; + for (size_t i = 0; i < deconvLayer->_kernel.size(); i++) { + weightDimsVec.push_back(TensorValue(deconvLayer->_kernel[i])); + } + outFeatures = deconvLayer->_out_depth; + pWeightsBlob = getBlob(layer, "weights"); + pBiasBlob = getBlobOrNull(layer, "biases"); + + if ((groupSize < outFeatures) || (groupSize < inFeatures)) + rearrange = FlipDeconvDims; + break; + } + case DeformableConvolution: { + auto defConvLayer = as (layer); + groupSize = 1; + /*if (groupSize >= 16) // cldnn optimization for 16 and more groups + groupSize = 1;*/ + weightDimsVec = { TensorValue(defConvLayer->_out_depth), TensorValue(inFeatures / defConvLayer->_group) }; + for (size_t i = 0; i < defConvLayer->_kernel.size(); i++) { + weightDimsVec.push_back(TensorValue(defConvLayer->_kernel[i])); + } + outFeatures = defConvLayer->_out_depth; + pWeightsBlob = defConvLayer->_weights; + pBiasBlob = defConvLayer->_biases; + break; + } + default: + THROW_IE_EXCEPTION << "Wrong weightable layer type"; + break; + } + + // create weights primitive + cldnn::format wFmt = m_defaultFormat; + if (weightDimsVec.size() > 4) + wFmt = cldnn::format::bfzyx; + + cldnn::layout weightsLayout = cldnn::layout( + DataTypeFromPrecision(pWeightsBlob->getTensorDesc().getPrecision()), + wFmt, + cldnn::tensor(weightDimsVec)); + size_t bytesPerGroup = weightsLayout.bytes_count(); + + for (unsigned g = 0; g < groupSize; g++) { + cldnn::primitive_id weightID = layer_type_name_ID(layer) + m_weightsTag + std::to_string(g); + weightID = CreatePrimitiveFromBlob(topology, + weightID, + pWeightsBlob, + weightsLayout, + g * bytesPerGroup, + rearrange); + weightsPrimID.push_back(weightID); + } + + // create bias primitive + if (pBiasBlob != nullptr) { + cldnn::layout biasesLayout = cldnn::layout( + DataTypeFromPrecision(pBiasBlob->getTensorDesc().getPrecision()), + FormatFromLayout(pBiasBlob->getTensorDesc().getLayout()), + (cldnn::tensor) cldnn::spatial(TensorValue(outFeatures / groupSize))); + size_t bytesPerGroup = biasesLayout.bytes_count(); + for (unsigned g = 0; g < groupSize; g++) { + cldnn::primitive_id biasID = layer_type_name_ID(layer) + m_biasesTag + std::to_string(g); + biasID = CreatePrimitiveFromBlob(topology, + biasID, + pBiasBlob, + biasesLayout, + g * bytesPerGroup); + biasesPrimID.push_back(biasID); + } + } +} + +void Program::CreateBinaryWeightAndBiasPrimitives(cldnn::topology& topology, + const InferenceEngine::CNNLayerPtr& layer, + std::vector& weightsPrimID, + std::vector& biasesPrimID) { + cldnn::tensor::value_type inFeatures = 1; // todo: workaround for xyf input, handle general case (xf, xyzf etc...) + std::shared_ptr insData0 = layer->insData[0].lock(); + IE_ASSERT(insData0 != nullptr); + const auto in0dims = insData0->getTensorDesc().getDims(); + if (in0dims.size() > 1) { + inFeatures = TensorValue(in0dims[1]); + } + std::vector weightDimsVec; + InferenceEngine::Blob::Ptr pWeightsBlob, pBiasBlob; + uint32_t groupSize = 1; + WeightRearrangeType rearrange = NO_REARRANGE; + + switch (LayerTypeFromStr(layer->type)) { + case BinaryConvolution: { + auto binaryConvLayer = as(layer); + groupSize = binaryConvLayer->_group; + if ((inFeatures % groupSize) || (binaryConvLayer->_out_depth % groupSize)) { + THROW_CLDNN_EXCEPTION("Invalid group size in layer " << binaryConvLayer->name); + } + weightDimsVec = { + TensorValue(binaryConvLayer->_out_depth), + TensorValue(inFeatures), + TensorValue(binaryConvLayer->_kernel[X_AXIS]), + TensorValue(binaryConvLayer->_kernel[Y_AXIS]) + }; + pWeightsBlob = binaryConvLayer->_weights; + pBiasBlob = binaryConvLayer->_biases; + break; + } + default: + THROW_CLDNN_EXCEPTION("Wrong binary weightable layer type"); + } + + // create weights primitive + cldnn::layout weightsLayout = cldnn::layout( + cldnn::data_types::bin, + cldnn::format::bfyx, + cldnn::tensor(weightDimsVec)); + + cldnn::primitive_id weightID = layer->name + m_weightsTag; + weightID = CreatePrimitiveFromBlob(topology, + weightID, + pWeightsBlob, + weightsLayout, + 0, + rearrange); + weightsPrimID.push_back(weightID); + + // create bias primitive + if (pBiasBlob != nullptr) { + THROW_CLDNN_EXCEPTION("Biases are not supported in BinaryConvolution primitive"); + } +} + +void Program::CreateScaleWeightsAndBiasesFromBN(cldnn::topology& topology, + const InferenceEngine::BatchNormalizationLayer* bnLayer, + cldnn::primitive_id& weightsPrimID, + cldnn::primitive_id& biasesPrimID) { + auto weightTD = bnLayer->_weights->getTensorDesc(); + auto biasTD = bnLayer->_biases->getTensorDesc(); + { + if (weightTD.getDims() != biasTD.getDims()) { + THROW_CLDNN_EXCEPTION("mean/variance dimensions mismatch in " << bnLayer->name); + } + if (weightTD.getPrecision() != biasTD.getPrecision()) { + THROW_CLDNN_EXCEPTION("mean/variance precision mismatch in " << bnLayer->name); + } + } + + cldnn::tensor blobTensor(0); + auto outDims = bnLayer->outData[0]->getTensorDesc().getDims(); + if (outDims.size() != 2 && outDims.size() != 4) { + THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name); + } + blobTensor = (cldnn::tensor) cldnn::feature(TensorValue(outDims[1])); + cldnn::layout blobLayout( + DataTypeFromPrecision(bnLayer->precision), + m_defaultFormat, + blobTensor); + + const auto wPecision = bnLayer->_weights->getTensorDesc().getPrecision(); + + switch (wPecision) { + case Precision::FP16: { + InferenceEngine::TBlob weightsBlob(bnLayer->_weights->getTensorDesc()); + weightsBlob.allocate(); + InferenceEngine::TBlob biasesBlob(bnLayer->_biases->getTensorDesc()); + biasesBlob.allocate(); + + auto weightsData = weightsBlob.data(); + auto biasesData = biasesBlob.data(); + auto varianceData = static_cast(bnLayer->_weights->buffer()); + auto meanData = static_cast(bnLayer->_biases->buffer()); + + cldnn_status status = CLDNN_SUCCESS; + for (size_t i = 0; i < weightsBlob.size(); i++) { + auto variance = cldnn_half_to_float(varianceData[i], &status); + if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); + auto mean = cldnn_half_to_float(meanData[i], &status); + if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); + + float scale = 1.0f / sqrt(variance + bnLayer->epsilon); + weightsData[i] = cldnn_float_to_half(scale, &status); + if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); + biasesData[i] = cldnn_float_to_half((-mean) * scale, &status); + if (status != CLDNN_SUCCESS) THROW_CLDNN_EXCEPTION("Error during fp16 conversion for layer " << bnLayer->name); + } + weightsPrimID = CreatePrimitiveFromBlob(topology, weightsPrimID, + std::make_shared>(weightsBlob), blobLayout); + biasesPrimID = CreatePrimitiveFromBlob(topology, biasesPrimID, + std::make_shared>(biasesBlob), blobLayout); + } + break; + case Precision::FP32: { + InferenceEngine::TBlob weightsBlob(bnLayer->_weights->getTensorDesc()); + weightsBlob.allocate(); + InferenceEngine::TBlob biasesBlob(bnLayer->_biases->getTensorDesc()); + biasesBlob.allocate(); + + auto weightsData = weightsBlob.data(); + auto biasesData = biasesBlob.data(); + auto varianceData = static_cast(bnLayer->_weights->buffer()); + auto meanData = static_cast(bnLayer->_biases->buffer()); + + for (size_t i = 0; i < weightsBlob.size(); i++) { + auto variance = varianceData[i]; + auto mean = meanData[i]; + weightsData[i] = 1.0f / sqrt(variance + bnLayer->epsilon); + biasesData[i] = (-mean) * weightsData[i]; + } + weightsPrimID = CreatePrimitiveFromBlob(topology, weightsPrimID, + std::make_shared>(weightsBlob), blobLayout); + biasesPrimID = CreatePrimitiveFromBlob(topology, biasesPrimID, + std::make_shared>(biasesBlob), blobLayout); + } + break; + default: + THROW_CLDNN_EXCEPTION("Unhandled mean/variance precision in " << bnLayer->name); + break; + } +} + +void Program::CreateQuantizationPrimitives(cldnn::topology& topology, + const InferenceEngine::CNNLayerPtr& layer, + std::vector& weightsQuantizationPrimID, + bool supportsDequantization, + size_t split) { + auto wScaleBlob = getBlobOrNull(layer, "w-scale"); + auto oiScaleBlob = getBlobOrNull(layer, "oi-scale"); + + auto layerName = layer_type_name_ID(layer); + + if (wScaleBlob != nullptr) { + auto wScaleDesc = wScaleBlob->getTensorDesc(); + auto wScaleDims = wScaleDesc.getDims(); + if (wScaleDims.size() != 1) + THROW_CLDNN_EXCEPTION("Incorrect weights scale dimensions number (" << wScaleDims.size() << ") - expected 1"); + + auto splitSize = wScaleDims[0] / split; + auto wScaleTensor = cldnn::tensor(cldnn::batch(splitSize)); + auto wScaleLayout = cldnn::layout( + DataTypeFromPrecision(wScaleDesc.getPrecision()), + m_defaultFormat, + wScaleTensor); + auto wScalePrimName = layerName + "_cldnn_wScale"; + + if (oiScaleBlob != nullptr) { + std::vector scaleVector; + float* wScaleData = wScaleBlob->buffer().as(); + float* oiScaleData = oiScaleBlob->buffer().as(); + + for (size_t si = 0; si < split; ++si) { + auto splitName = wScalePrimName + std::to_string(si); + + auto mem = cldnn::memory::allocate(*m_engine, wScaleLayout); + auto ptr = mem.pointer(); + + for (size_t c = 0; c < splitSize; ++c) { + ptr[c] = wScaleData[si * splitSize + c] / oiScaleData[si * splitSize + c]; + } + + topology.add(cldnn::data(splitName, mem)); + weightsQuantizationPrimID.push_back(splitName); + } + + } else if (supportsDequantization) { + auto wScaleBytes = wScaleLayout.bytes_count(); + + for (size_t si = 0; si < split; ++si) { + auto splitName = wScalePrimName + std::to_string(si); + CreatePrimitiveFromBlob(topology, splitName, wScaleBlob, wScaleLayout, si * wScaleBytes); + weightsQuantizationPrimID.push_back(splitName); + } + } + } +} + + +void Program::CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + // Initialize a profiling entry + InitProfileInfo(layer->name, layer_type_lower(layer)); + + // First check for custom layer + auto customLayer = m_config.customLayers.find(layer->type); + if (customLayer != m_config.customLayers.end()) { + CreateCustomLayerPrimitive(topology, layer, customLayer->second); + return; + } + + // Otherwise move on to built-in layer types + switch (LayerTypeFromStr(layer->type)) { + case Convolution: + CreateConvolutionPrimitive(topology, layer); + break; + case DeformableConvolution: + CreateDeformableConvolutionPrimitive(topology, layer); + break; + case ReLU: + case ReLU6: + case Sigmoid: + case TanH: + case ELU: + case Clamp: + case Activation: + case Exp: + case Not: + case Asin: + case Atan: + case Acos: + case Abs: + case Asinh: + case Acosh: + case Tan: + case Atanh: + case Floor: + case Ceil: + case Erf: + case HardSigmoid: + case Log: + case Neg: + case Reciprocal: + case Selu: + case Sign: + case SoftPlus: + case SoftSign: + CreateActivationPrimitive(topology, layer, LayerTypeFromStr(layer->type)); + break; + case LRN: CreateLRNPrimitive(topology, layer); + break; + case Pooling: CreatePoolingPrimitive(topology, layer); + break; + case Unpooling: CreateMaxUnpoolingPrimitive(topology, layer); + break; + case FullyConnected: CreateFullyConnectedPrimitive(topology, layer); + break; + case SoftMax: CreateSoftMaxPrimitive(topology, layer); + break; + case Power: CreatePowerPrimitive(topology, layer); + break; + case Split: CreateSplitPrimitive(topology, layer); + break; + case Concatenate: CreateConcatenatePrimitive(topology, layer); + break; + case Eltwise: CreateEltwisePrimitive(topology, layer); + break; + case SimplerNMS: CreateSimplerNMSPrimitive(topology, layer); + break; + case ROIPooling: CreateROIPoolingPrimitive(topology, layer); + break; + case Crop: CreateCropPrimitive(topology, layer); + break; + case Deconvolution: CreateDeconvolutionPrimitive(topology, layer); + break; + case PriorBox: CreatePriorBoxPrimitive(topology, layer); + break; + case DetectionOutput: CreateDetectionOutputPrimitive(topology, layer); + break; + case Normalize: CreateNormalizePrimitive(topology, layer); + break; + case Reshape: CreateReshapePrimitive(topology, layer); + break; + case Permute: CreatePermutePrimitive(topology, layer); + break; + case Flatten: CreateFlattenPrimitive(topology, layer); + break; + case BatchNormalization: CreateBatchNormalizationPrimitive(topology, layer); + break; + case PReLU: CreatePReLUPrimitive(topology, layer); + break; + case ScaleShift: CreateScaleShiftPrimitive(topology, layer); + break; + case Proposal: CreateProposalPrimitive(topology, layer); + break; + case PSROIPooling: CreatePSROIPoolingPrimitive(topology, layer); + break; + case Copy: CreateCopyPrimitive(topology, layer); + break; + case Upsampling: CreateUpsamplingPrimitive(topology, layer); + break; + case Resample: CreateResamplePrimitive(topology, layer); + break; + case ArgMax: + case ArgMin: + CreateArgMaxMinPrimitive(topology, layer, LayerTypeFromStr(layer->type)); + break; + case MVN: CreateMVNPrimitive(topology, layer); + break; + case LSTMCell: CreateLSTMCellPrimitive(topology, layer); + break; + case RNN: CreateRNNPrimitive(topology, layer); + break; + case RegionYolo: CreateYOLO2RegionPrimitive(topology, layer); + break; + case ReorgYolo: CreateYOLO2ReorgPrimitive(topology, layer); + break; + case Tile: CreateTilePrimitive(topology, layer); + break; + case Pad: CreatePadPrimitive(topology, layer); + break; + case Gather: CreateGatherPrimitive(topology, layer); + break; + case DepthToSpace: CreateDepthToSpacePrimitive(topology, layer); + break; + case ShuffleChannels: CreateShuffleChannelsPrimitive(topology, layer); + break; + case StridedSlice: CreateStridedSlicePrimitive(topology, layer); + break; + case Broadcast: CreateBroadcastPrimitive(topology, layer); + break; + case ReverseSequence: CreateReverseSequencePrimitive(topology, layer); + break; + case BinaryConvolution: CreateBinaryConvolutionPrimitive(topology, layer); + break; + case Quantize: CreateQuantizePrimitive(topology, layer); + break; + case Squeeze: CreateReshapePrimitive(topology, layer); + break; + case Unsqueeze: CreateReshapePrimitive(topology, layer); + break; + case Reduce: CreateReducePrimitive(topology, layer); + break; + case TopK: CreateTopKPrimitive(topology, layer); + break; + case Gemm: CreateGemmPrimitive(topology, layer); + break; + case OneHot: CreateOneHotPrimitive(topology, layer); + break; + default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type); + } +} + +void Program::CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto scaleShiftLayer = as (layer); + + // create scales and biases + cldnn::primitive_id scalePrimID = scaleShiftLayer->name + m_scalesTag; + cldnn::primitive_id biasPrimID = scaleShiftLayer->name + m_biasesTag; + + const auto& wDims = scaleShiftLayer->_weights->getTensorDesc().getDims(); + cldnn::tensor weightTensor(1); + switch (wDims.size()) { + case 1: weightTensor = (cldnn::tensor) cldnn::feature(TensorValue(wDims[0])); // value per feature (or 1 global value) + break; + case 4: weightTensor = cldnn::tensor(TensorValue(wDims[0]), TensorValue(wDims[1]), TensorValue(wDims[3]), TensorValue(wDims[2])); // value per pixel + break; + case 5: weightTensor = cldnn::tensor(TensorValue(wDims[0]), TensorValue(wDims[1]), TensorValue(wDims[4]), TensorValue(wDims[3]), + TensorValue(wDims[2])); // value per pixel + break; + default: THROW_CLDNN_EXCEPTION("Invalid weights dimensions in layer " << layer->name); + break; + } + cldnn::layout blobLayout(DataTypeFromPrecision(layer->precision), m_defaultFormat, weightTensor); + scalePrimID = CreatePrimitiveFromBlob(topology, scalePrimID, scaleShiftLayer->_weights, blobLayout); + if (scaleShiftLayer->_biases != nullptr) { + const auto& bDims = scaleShiftLayer->_biases->getTensorDesc().getDims(); + if (bDims != wDims) { + THROW_CLDNN_EXCEPTION("Invalid bias blob dimensions in layer " << layer->name); + } + biasPrimID = CreatePrimitiveFromBlob(topology, biasPrimID, scaleShiftLayer->_biases, blobLayout); + } else { + biasPrimID = ""; // 0-bias + } + + auto inPrecision = layer->insData[0].lock()->getPrecision(); + auto outPrecision = layer->outData[0]->getPrecision(); + auto layerPrecision = layer->precision; + auto dimensionsNumber = layer->outData[0]->getTensorDesc().getDims().size(); + + std::string scaleShiftLayerName = layer_type_name_ID(layer); + + std::string prevLayerName = inputPrimitives[0]; + + // Cast input data if it doesn't match operating precision + if (inPrecision != layerPrecision) { + std::string inReorderName = scaleShiftLayerName + "_cldnn_in_cast"; + + auto inReorderPrim = cldnn::reorder( + inReorderName, + prevLayerName, + defaultFormatForDims(dimensionsNumber), + DataTypeFromPrecision(layerPrecision)); + + topology.add(inReorderPrim); + profilingIDs.push_back(inReorderName); + primitivesToIRLayersMap[inReorderName] = { layer->name }; + primitiveIDs[inReorderName] = inReorderName; + + prevLayerName = inReorderName; + } + + auto scaleShiftPrim = cldnn::scale( + scaleShiftLayerName, + prevLayerName, + scalePrimID, + biasPrimID); + + prevLayerName = scaleShiftLayerName; + + topology.add(scaleShiftPrim); + profilingIDs.push_back(scaleShiftLayerName); + primitivesToIRLayersMap[scaleShiftLayerName] = { layer->name }; + + // Cast output data if it doesn't match operating precision + if (outPrecision != layerPrecision) { + std::string outReorderName = scaleShiftLayerName + "_cldnn_out_cast"; + + auto outReorderPrim = cldnn::reorder( + outReorderName, + prevLayerName, + defaultFormatForDims(dimensionsNumber), + DataTypeFromPrecision(outPrecision)); + + topology.add(outReorderPrim); + profilingIDs.push_back(outReorderName); + primitivesToIRLayersMap[outReorderName] = { layer->name }; + primitiveIDs[outReorderName] = outReorderName; + + prevLayerName = outReorderName; + } + + primitiveIDs[scaleShiftLayerName] = prevLayerName; +} + +void Program::CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer) { + ValidateLayer(layer, 3); + auto proposalLayer = as (layer); + + float nms_thresh = proposalLayer->GetParamAsFloat("nms_thresh", 0.7f); + int min_size = proposalLayer->GetParamAsInt("min_size", 16); + int feature_stride = proposalLayer->GetParamAsInt("feat_stride", 16); + int pre_nms_topn = proposalLayer->GetParamAsInt("pre_nms_topn", 6000); + int post_nms_topn = proposalLayer->GetParamAsInt("post_nms_topn", 300); + const std::vector ratio = proposalLayer->GetParamAsFloats("ratio"); + const std::vector scale = proposalLayer->GetParamAsFloats("scale"); + float box_coordinate_scale = proposalLayer->GetParamAsFloat("box_coordinate_scale", 1.0f); + float box_size_scale = proposalLayer->GetParamAsFloat("box_size_scale", 1.0f); + int base_size = proposalLayer->GetParamAsInt("base_size", 16); + std::string framework = proposalLayer->GetParamAsString("framework", ""); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + bool normalize = layer->GetParamAsBool("normalize", false); + bool clip_before_nms = layer->GetParamAsBool("clip_before_nms", true); + bool clip_after_nms = layer->GetParamAsBool("clip_after_nms", false); + + float coordinates_offset; + bool swap_xy; + bool initial_clip; + bool round_ratios; + bool shift_anchors; + + if (framework == "tensorflow") { + coordinates_offset = 0.0f; + initial_clip = true; + shift_anchors = true; + round_ratios = false; + swap_xy = true; + } else { + coordinates_offset = 1.0f; + initial_clip = false; + shift_anchors = false; + round_ratios = true; + swap_xy = false; + } + + const bool for_deformable = layer->GetParamAsBool("for_deformable", 0); + + if (layer->outData.size() == 2) { + cldnn::layout mutableLayout = cldnn::layout( + DataTypeFromPrecision(layer->outData[1]->getPrecision()), + m_defaultFormat, + CldnnTensorFromIEDims(layer->outData[1]->getDims())); + + auto shared_memory = cldnn::memory::allocate(*m_engine, mutableLayout); + + cldnn::primitive_id proposal_mutable_id_w = layer_type_name_ID(layer) + "_md_write"; + auto argmax_mutable_prim = cldnn::mutable_data(proposal_mutable_id_w, shared_memory); + primitivesToIRLayersMap[proposal_mutable_id_w] = { layer->name }; + primitiveIDs[proposal_mutable_id_w] = proposal_mutable_id_w; + topology.add(argmax_mutable_prim); + inputPrimitives.push_back(proposal_mutable_id_w); + + std::string proposalLayerName = layer_type_lower(layer) + ":" + layer->outData[0]->getName(); + + auto proposalPrim = cldnn::proposal( + proposalLayerName, + inputPrimitives[0], // cls_score + inputPrimitives[1], // bbox_pred + inputPrimitives[2], // im_info + inputPrimitives[3], // second_output + 0, // max_num_proposals is unused + nms_thresh, + base_size, + min_size, + feature_stride, + pre_nms_topn, + post_nms_topn, + ratio, + scale, + coordinates_offset, + box_coordinate_scale, + box_size_scale, + for_deformable, + swap_xy, + initial_clip, + clip_before_nms, + clip_after_nms, + round_ratios, + shift_anchors, + normalize); + + primitivesToIRLayersMap[proposalLayerName] = { layer->name }; + primitiveIDs[proposalLayerName] = proposalLayerName; + topology.add(proposalPrim); + + cldnn::primitive_id proposal_mutable_id_r = layer_type_lower(layer) + ":" + layer->outData[1]->getName(); + auto argmax_mutable_prim_r = cldnn::mutable_data(proposal_mutable_id_r, { proposalLayerName }, shared_memory); + primitivesToIRLayersMap[proposal_mutable_id_r] = { layer->name }; + primitiveIDs[proposal_mutable_id_r] = proposal_mutable_id_r; + topology.add(argmax_mutable_prim_r); + + profilingIDs.push_back(proposalLayerName); + return; + } + + std::string proposalLayerName = layer_type_name_ID(layer); + auto proposalPrim = cldnn::proposal( + proposalLayerName, + inputPrimitives[0], // cls_score + inputPrimitives[1], // bbox_pred + inputPrimitives[2], // im_info + 0, // max_num_proposals is unused + nms_thresh, + base_size, + min_size, + feature_stride, + pre_nms_topn, + post_nms_topn, + ratio, + scale, + coordinates_offset, + box_coordinate_scale, + box_size_scale, + for_deformable, + swap_xy, + initial_clip, + clip_before_nms, + clip_after_nms, + round_ratios, + shift_anchors, + normalize); + + primitivesToIRLayersMap[proposalLayerName] = { layer->name }; + primitiveIDs[proposalLayerName] = proposalLayerName; + topology.add(proposalPrim); + profilingIDs.push_back(proposalLayerName); +} + +void Program::CreatePReLUPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto preluLayer = as (layer); + + std::string preluLayerName = layer_type_name_ID(layer); + auto inDataPtr = preluLayer->insData[0].lock(); + if (!inDataPtr) { + THROW_CLDNN_EXCEPTION("Data inserted into PreLu " << preluLayer->name << " is nullptr"); + } + + static const std::string blobName("weights"); + ValidateGenericLayerBlobs(preluLayer, { blobName }); + + bool channel_shared = preluLayer->GetParamAsBool("channel_shared", false); + + auto slopeBlob = preluLayer->blobs.at(blobName); + const auto slopeBlobDesc = slopeBlob->getTensorDesc(); + const auto dim0 = slopeBlobDesc.getDims().back(); + if (channel_shared) { + if (dim0 != 1) { // slopeBlob->dims()[0] != 1 + THROW_CLDNN_EXCEPTION("PReLU slope blob with wrong dimensions in " << preluLayer->name); + } + float slope(0.0f); + switch (slopeBlobDesc.getPrecision()) { + case InferenceEngine::Precision::FP32: + slope = *static_cast(slopeBlob->buffer()); + break; + case InferenceEngine::Precision::FP16: + { + cldnn_status status = CLDNN_SUCCESS; + slope = cldnn_half_to_float(*static_cast(slopeBlob->buffer()), &status); + if (status != CLDNN_SUCCESS) { + THROW_CLDNN_EXCEPTION("Error converting fp16 value in " << preluLayer->name); + } + } + break; + default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name); + } + topology.add(cldnn::activation(preluLayerName, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f })); + } else { + cldnn::primitive_id slopePrimID(preluLayerName + "_" + blobName + m_weightsTag); + auto map = CreateGenericLayerBlobPrimitives(topology, preluLayer); + topology.add(cldnn::activation(preluLayerName, inputPrimitives[0], map.at(slopePrimID), activation_relu_negative_slope)); + } + + primitivesToIRLayersMap[preluLayerName] = { layer->name }; + primitiveIDs[preluLayerName] = preluLayerName; + profilingIDs.push_back(preluLayerName); +} + +void Program::CreateBatchNormalizationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + std::string bnLayerName = layer_type_name_ID(layer); + + auto bnLayer = as (layer); + cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag; + cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag; + +#define _SCALE_BN_OPT +#ifdef _SCALE_BN_OPT + // Using scale as an optimization (1 mad instead of mad+rsq) + // create new blobs for scale shift + CreateScaleWeightsAndBiasesFromBN(topology, bnLayer, weightID, biasID); + auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID); + + primitivesToIRLayersMap[bnLayerName] = { layer->name }; + primitiveIDs[bnLayerName] = bnLayerName; + topology.add(scalePrim); + profilingIDs.push_back(bnLayerName); + return; +#else + cldnn::tensor blobTensor(0); + const auto bnDims = bnLayer->outData[0]->getTensorDesc().getDims(); + switch (bnDims.size()) { + case 2: + blobTensor = cldnn::feature(TensorValue(bnDims[1])); + break; + case 4: + blobTensor = cldnn::feature(TensorValue(bnDims[1])); + break; + default: + THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name); + } + cldnn::layout blobLayout( + DataTypeFromPrecision(layer->precision), + m_defaultFormat, + blobTensor); + + // Create variance primitive + cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag; + varianceID = CreatePrimitiveFromBlob(topology, varianceID, bnLayer->_weights, blobLayout); + + // Create mean primitive + cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag; + meanID = CreatePrimitiveFromBlob(topology, meanID, bnLayer->_biases, blobLayout); + + auto bnPrim = cldnn::batch_norm( + bnLayerName, + inputPrimitives[0], + meanID, + varianceID, + bnLayer->epsilon); + + primitivesToIRLayersMap[bnLayerName] = { layer->name }; + primitiveIDs[bnLayerName] = bnLayerName; + topology.add(bnPrim); + profilingIDs.push_back(bnLayerName); +#endif // _SCALE_BN_OPT +} + +void Program::CreateFlattenPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto flattenLayer = as (layer); + std::string flattenLayerName = layer_type_name_ID(layer); + + auto flattenPrim = cldnn::reshape( + flattenLayerName, + inputPrimitives[0], + CldnnTensorFromIEDims(flattenLayer->outData[0]->getTensorDesc().getDims())); + + primitivesToIRLayersMap[flattenLayerName] = { layer->name }; + primitiveIDs[flattenLayerName] = flattenLayerName; + topology.add(flattenPrim); + profilingIDs.push_back(flattenLayerName); +} + +void Program::CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto permuteLayer = as(layer); + std::vector ie_order; + for (auto& a : permuteLayer->GetParamAsInts("order")) + ie_order.push_back(static_cast(a)); + + // if order size is less than 4 - fill the rest with just copy + for (auto o = ie_order.size(); o < 4; o++) + ie_order.push_back((uint16_t)o); + + /* + Because of the cldnn ordering: bfxy, and IE ordering: bfyx + we need to adjust the permute order. + */ + std::vector cldnn_permute_order; + // 1. Switch permute order values for spatial dims + for (auto const& o : ie_order) { + if (o >= 2) + cldnn_permute_order.push_back(1 + ie_order.size() - o); + else + cldnn_permute_order.push_back(o); + } + // 2. Swap spatial positions + for (int i = 0; i < (cldnn_permute_order.size() - 2) / 2; i++) { + std::swap(cldnn_permute_order[2 + i], cldnn_permute_order[1 + cldnn_permute_order.size() - (2 + i)]); + } + + std::string permuteLayerName = layer_type_name_ID(layer); + + auto permutePrim = cldnn::permute( + permuteLayerName, + inputPrimitives[0], + cldnn_permute_order); + + primitivesToIRLayersMap[permuteLayerName] = { layer->name }; + primitiveIDs[permuteLayerName] = permuteLayerName; + topology.add(permutePrim); + profilingIDs.push_back(permuteLayerName); +} + +void Program::CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + if (layer->insData.size() != 1 && layer->insData.size() != 2) + THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto reshapeLayer = as(layer); + IE_ASSERT(reshapeLayer->outData.size()); + std::string reshapeLayerName = layer_type_name_ID(layer); + + auto outDesc = reshapeLayer->outData[0]->getTensorDesc(); + auto inDims = reshapeLayer->input()->getTensorDesc().getDims(); + auto outDims = outDesc.getDims(); + auto outTensor = CldnnTensorFromIEDims(outDims); + + // if we convert from or to 5D/6D, additional reorder also required to change format + cldnn::primitive_id reshapeInputId = inputPrimitives[0]; + if (inDims.size() != outDims.size()) { + cldnn::primitive_id reorderId = reshapeLayerName + "_reorder"; + cldnn::format outputFormat = cldnn::format::bfyx; + + switch (outDims.size()) { + case 5: outputFormat = cldnn::format::bfzyx; break; + case 6: outputFormat = cldnn::format::bfwzyx; break; + default: break; + } + + cldnn::layout outputLayout(DataTypeFromPrecision(outDesc.getPrecision()), outputFormat, outTensor); + topology.add(cldnn::reorder(reorderId, reshapeInputId, outputLayout)); + reshapeInputId = reorderId; + primitivesToIRLayersMap[reorderId] = { layer->name }; + } + + auto reshapePrim = cldnn::reshape( + reshapeLayerName, + reshapeInputId, + outTensor); + + primitivesToIRLayersMap[reshapeLayerName] = { layer->name }; + primitiveIDs[reshapeLayerName] = reshapeLayerName; + topology.add(reshapePrim); + profilingIDs.push_back(reshapeLayerName); +} + +void Program::CreateNormalizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto normLayer = as (layer); + ValidateGenericLayerBlobs(normLayer, { "weights" }); + auto map = CreateGenericLayerBlobPrimitives(topology, normLayer); + + // params + bool across_spatial = normLayer->GetParamAsBool("across_spatial", true); + float eps = normLayer->GetParamAsFloat("eps", 0.0f); + + // WA for MO outputting %.6f + if (eps == 0.0f) { + eps = 1e-10f; + } + + std::string normLayerName = layer_type_name_ID(layer); + auto normPrim = cldnn::normalize( + normLayerName, + inputPrimitives[0], + map.at(normLayerName + "_weights" + m_weightsTag), + across_spatial, + eps); + + primitivesToIRLayersMap[normLayerName] = { layer->name }; + primitiveIDs[normLayerName] = normLayerName; + topology.add(normPrim); + profilingIDs.push_back(normLayerName); +} + +void Program::CreateDetectionOutputPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 3); + auto detectionLayer = as (layer); + + uint32_t num_classes = detectionLayer->GetParamAsUInt("num_classes", 1); + bool share_location = detectionLayer->GetParamAsBool("share_location", true); + int background_label_id = detectionLayer->GetParamAsInt("background_label_id", 0); + float nms_threshold = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f); + int top_k = detectionLayer->GetParamAsInt("top_k", -1); + float confidence_threshold = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX); + float eta = detectionLayer->GetParamAsFloat("eta", 1.0f); + int keep_top_k = detectionLayer->GetParamAsInt("keep_top_k", -1); + bool variance_encoded_in_target = detectionLayer->GetParamAsBool("variance_encoded_in_target", false); + int input_width = detectionLayer->GetParamAsInt("input_width", -1); + int input_height = detectionLayer->GetParamAsInt("input_height", -1); + bool normalized = detectionLayer->GetParamAsBool("normalized", true); + std::string code_type = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER"); + bool clip_before_nms = detectionLayer->GetParamAsBool("clip_before_nms", false) || + detectionLayer->GetParamAsBool("clip", false); // For backward compatibility + bool clip_after_nms = detectionLayer->GetParamAsBool("clip_after_nms", false); + bool decrease_label_id = detectionLayer->GetParamAsBool("decrease_label_id", false); + + cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type); + int32_t prior_info_size = normalized != 0 ? 4 : 5; + int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1; + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + std::string detectionLayerName = layer_type_name_ID(layer); + auto detectionPrim = cldnn::detection_output(detectionLayerName, + inputPrimitives[0], + inputPrimitives[1], + inputPrimitives[2], + num_classes, + keep_top_k, + share_location, + background_label_id, + nms_threshold, + top_k, + eta, + cldnnCodeType, + variance_encoded_in_target, + confidence_threshold, + prior_info_size, + prior_coordinates_offset, + normalized, + input_width, + input_height, + decrease_label_id, + clip_before_nms, + clip_after_nms); + + primitivesToIRLayersMap[detectionLayerName] = { layer->name }; + primitiveIDs[detectionLayerName] = detectionLayerName; + topology.add(detectionPrim); + profilingIDs.push_back(detectionLayerName); +} + +void Program::CreatePriorBoxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + auto priorBoxLayer = as (layer); + + // params + std::vector min_size = priorBoxLayer->GetParamAsFloats("min_size"); + std::vector max_size = priorBoxLayer->GetParamAsFloats("max_size", {}); + std::vector aspect_ratio = priorBoxLayer->GetParamAsFloats("aspect_ratio", {}); + std::vector variance = priorBoxLayer->GetParamAsFloats("variance"); + bool flip = priorBoxLayer->GetParamAsBool("flip", true); + bool clip = priorBoxLayer->GetParamAsBool("clip", false); + bool scale_all_sizes = priorBoxLayer->GetParamAsBool("scale_all_sizes", true); + float offset = priorBoxLayer->GetParamAsFloat("offset", 0.5f); + + auto step_w = priorBoxLayer->GetParamAsFloat("step_w", 0.0f); + auto step_h = priorBoxLayer->GetParamAsFloat("step_h", 0.0f); + auto step = priorBoxLayer->GetParamAsFloat("step", 0.0f); + + float _step_w = 0.0f; + float _step_h = 0.0f; + if (HasParam(priorBoxLayer->params, "step_w") && step_w != 0.0f && + HasParam(priorBoxLayer->params, "step_h") && step_h != 0.0f) { + _step_w = step_w; + _step_h = step_h; + } else if (HasParam(priorBoxLayer->params, "step") && step != 0.0f) { + _step_w = step; + _step_h = step; + } + + int img = priorBoxLayer->GetParamAsInt("img_size", 0); + int img_w = priorBoxLayer->GetParamAsInt("img_w", 0); + int img_h = priorBoxLayer->GetParamAsInt("img_h", 0); + if ((img != 0) || (img_w != 0) || (img_h != 0)) { + // unsupported mode + THROW_CLDNN_EXCEPTION("Unsupported image sizes in prior box " + layer->name + " (use an image blob instead of dimensions)"); + } + + IE_ASSERT(layer->insData[1].lock()); + auto img_dims = layer->insData[1].lock()->getTensorDesc().getDims(); + + auto wdim = img_dims.back(); + auto hdim = img_dims.at(img_dims.size()-2); + + cldnn::tensor img_size = (cldnn::tensor) cldnn::spatial(TensorValue(wdim), TensorValue(hdim)); + std::vector inputPrimitives = GetPrevLayersPrimitives(layer); + // second input isn't used by value - only dimensions taken from the layer input + + if (_step_w == 0.0f || _step_h == 0.0f) { + _step_w = static_cast(img_w) / static_cast(wdim); + _step_h = static_cast(img_h) / static_cast(hdim); + } + + std::string priorBoxLayerName = layer_type_name_ID(layer); + auto priorBoxPrim = cldnn::prior_box( + priorBoxLayerName, + inputPrimitives[0], + img_size, + min_size, + max_size, + aspect_ratio, + flip, + clip, + variance, + _step_w, + _step_h, + offset, + scale_all_sizes); + + primitivesToIRLayersMap[priorBoxLayerName] = { layer->name }; + primitiveIDs[priorBoxLayerName] = priorBoxLayerName; + topology.add(priorBoxPrim); + profilingIDs.push_back(priorBoxLayerName); +} + +void Program::CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto deconvLayer = as (layer); + + if (deconvLayer->_dilation[X_AXIS] != 1 || deconvLayer->_dilation[Y_AXIS] != 1) { + THROW_CLDNN_EXCEPTION("Unsupported dilation in deconvolution " << layer->name); + } + + std::vector weightPrimID; + std::vector biasPrimID; + CreateWeightAndBiasPrimitives(topology, layer, weightPrimID, biasPrimID); + + auto allPad = getPaddings(*deconvLayer); + int x_pad = allPad.begin[X_AXIS], y_pad = allPad.begin[Y_AXIS]; + cldnn::tensor stride, padding, dilation; + if (deconvLayer->input()->getTensorDesc().getDims().size() > 4) { + stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(deconvLayer->_stride[X_AXIS], + deconvLayer->_stride[Y_AXIS], + deconvLayer->_stride[Z_AXIS])); + int z_pad = allPad.begin[Z_AXIS]; + padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), + cldnn::spatial(-x_pad, -y_pad, -z_pad)); + dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(deconvLayer->_dilation[X_AXIS], + deconvLayer->_dilation[Y_AXIS], + deconvLayer->_dilation[Z_AXIS])); + } else { + stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(deconvLayer->_stride[X_AXIS], deconvLayer->_stride[Y_AXIS])); + padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), + cldnn::spatial(-x_pad, -y_pad, 0)); + dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(deconvLayer->_dilation[X_AXIS], deconvLayer->_dilation[Y_AXIS])); + } + + std::string deconvLayerName = layer_type_name_ID(layer); + + if (deconvLayer->_group >= 16) { + auto deconvPrim = cldnn::deconvolution(deconvLayerName, + inputPrimitives[0], + weightPrimID, + biasPrimID, + deconvLayer->_group, + stride, + padding, + false, + 0.0f, + CldnnTensorFromIEDims(deconvLayer->outData[0]->getTensorDesc().getDims())); + topology.add(deconvPrim); + } else { + auto deconvPrim = cldnn::deconvolution(deconvLayerName, + inputPrimitives[0], + weightPrimID, + biasPrimID, + stride, + padding, + false, + 0.0f, + CldnnTensorFromIEDims(deconvLayer->outData[0]->getTensorDesc().getDims())); + topology.add(deconvPrim); + } + primitivesToIRLayersMap[deconvLayerName] = { layer->name }; + primitiveIDs[deconvLayerName] = deconvLayerName; + profilingIDs.push_back(deconvLayerName); +} + +void Program::CreateCropPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + if (layer->insData.size() != 1 && layer->insData.size() != 2) { + THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name); + } + if (layer->_fusedWith) { + THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name); + } + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto cropLayer = as (layer); + IE_ASSERT(cropLayer->axis.size() == cropLayer->offset.size()); + // IE_ASSERT(cropLayer->outData[0] && cropLayer->outData[0]->dims.size() == 4); + + std::vector offset{ 0, 0, 0, 0 }; + for (size_t i = 0; i < cropLayer->axis.size(); i++) { + if (cropLayer->axis[i] < 0 || cropLayer->axis[i] > 3) { + THROW_CLDNN_EXCEPTION("Invalid crop axis: " + std::to_string(cropLayer->axis[i]) + " in layer " + cropLayer->name); + } + offset[cropLayer->axis[i]] = cropLayer->offset[i]; + } + auto outputDims = cropLayer->outData[0]->getTensorDesc().getDims(); + const size_t ods = outputDims.size(); + cldnn::tensor refSize( + TensorValue(ods > 0 ? outputDims[0] : 1), + TensorValue(ods > 1 ? outputDims[1] : 1), + TensorValue(ods > 3 ? outputDims[3] : 1), + TensorValue(ods > 2 ? outputDims[2] : 1)); + + cldnn::tensor offSize( + TensorValue(offset[0]), + TensorValue(offset[1]), + TensorValue(offset[3]), + TensorValue(offset[2])); + + std::string cropLayerName = layer_type_name_ID(layer); + auto cropPrim = cldnn::crop( + cropLayerName, + inputPrimitives[0], + refSize, + offSize); + + primitivesToIRLayersMap[cropLayerName] = { layer->name }; + primitiveIDs[cropLayerName] = cropLayerName; + topology.add(cropPrim); + profilingIDs.push_back(cropLayerName); +} + +void Program::CreateROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + auto roiPoolingLayer = as (layer); + + // params + int pooled_width = roiPoolingLayer->GetParamAsInt("pooled_w", 0); + int pooled_height = roiPoolingLayer->GetParamAsInt("pooled_h", 0); + float spatial_scale = roiPoolingLayer->GetParamAsFloat("spatial_scale", 1.0f); + std::string method = roiPoolingLayer->GetParamAsString("method", "max"); + bool position_sensitive = false; + + cldnn::pooling_mode mode = cldnn::pooling_mode::max; + if (method == "bilinear") { + mode = cldnn::pooling_mode::bilinear; + } + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + std::string roiPoolingLayerName = layer_type_name_ID(layer); + auto roiPoolingPrim = cldnn::roi_pooling(roiPoolingLayerName, + inputPrimitives[0], // input data + inputPrimitives[1], // input rois + mode, + position_sensitive, + pooled_width, + pooled_height, + spatial_scale); + + primitivesToIRLayersMap[roiPoolingLayerName] = { layer->name }; + primitiveIDs[roiPoolingLayerName] = roiPoolingLayerName; + topology.add(roiPoolingPrim); + profilingIDs.push_back(roiPoolingLayerName); +} + +void Program::CreatePSROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + auto psROIPoolingLayer = as (layer); + + // params + std::string mode_str = psROIPoolingLayer->GetParamAsString("mode", "average"); + cldnn::pooling_mode mode = mode_str == "average" ? cldnn::pooling_mode::average : + mode_str == "bilinear" ? cldnn::pooling_mode::bilinear : cldnn::pooling_mode::deformable_bilinear; + bool no_trans = psROIPoolingLayer->GetParamAsBool("no_trans", true); + if (mode != cldnn::pooling_mode::deformable_bilinear || no_trans) + ValidateLayer(layer, 2); + else + ValidateLayer(layer, 3); + int group_size = psROIPoolingLayer->GetParamAsInt("group_size"); + int output_dim = psROIPoolingLayer->GetParamAsInt("output_dim"); + float spatial_scale = psROIPoolingLayer->GetParamAsFloat("spatial_scale"); + int spatial_bins_x = psROIPoolingLayer->GetParamAsInt("spatial_bins_x", 1); + int spatial_bins_y = psROIPoolingLayer->GetParamAsInt("spatial_bins_y", 1); + bool position_sensitive = true; + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + std::string psROIPoolingLayerName = layer_type_name_ID(layer); + + if (mode != cldnn::pooling_mode::deformable_bilinear) { + auto psROIPoolingPrim = cldnn::roi_pooling(psROIPoolingLayerName, + inputPrimitives[0], // input data + inputPrimitives[1], // input rois + mode, + position_sensitive, + group_size, + group_size, + spatial_scale, + output_dim, + spatial_bins_x, + spatial_bins_y); + topology.add(psROIPoolingPrim); + } else { + float trans_std = psROIPoolingLayer->GetParamAsFloat("trans_std", 1); + int part_size = psROIPoolingLayer->GetParamAsInt("part_size", 1); + int pooled_width = psROIPoolingLayer->GetParamAsInt("pooled_width", 1); + int pooled_height = psROIPoolingLayer->GetParamAsInt("pooled_height", 1); + + auto psROIPoolingPrim = cldnn::roi_pooling(psROIPoolingLayerName, + inputPrimitives, + mode, + position_sensitive, + pooled_width, + pooled_height, + spatial_scale, + trans_std, + no_trans, + part_size, + group_size, + output_dim, + spatial_bins_x, + spatial_bins_y); + topology.add(psROIPoolingPrim); + } + primitivesToIRLayersMap[psROIPoolingLayerName] = {layer->name}; + primitiveIDs[psROIPoolingLayerName] = psROIPoolingLayerName; + profilingIDs.push_back(psROIPoolingLayerName); +} + +void Program::CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) { + ValidateLayer(layer, 0); + // todo: handling fusing + auto genericLayer = as (layer); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + // Handle defines + std::string layerDefines; + for (const auto& def : customLayer->Defines()) { + std::string singleDefine("#define " + def.name + " " + def.prefix); + if (genericLayer->params.find(def.param) != genericLayer->params.end()) { + singleDefine += genericLayer->params.at(def.param); + } else { + singleDefine += def.default_value; + } + singleDefine += def.postfix + "\n"; + layerDefines.append(singleDefine); + } + + // reserve + std::vector reorderedInputs; + reorderedInputs.resize(inputPrimitives.size()); + + // Handle Blobs + std::map blobIndex; + for (auto& blob : genericLayer->blobs) { + const auto blobDims = blob.second->getTensorDesc().getDims(); + // create primitive from blob (always 1d) + cldnn::primitive_id blobId = genericLayer->name + "_" + blob.first; + if (blobDims.size() != 1) { + THROW_CLDNN_EXCEPTION("Invalid dimensions for blob " << blob.first << " in layer " << genericLayer->name); + } + cldnn::layout genericBlobLayout(DataTypeFromPrecision(blob.second->getTensorDesc().getPrecision()), + m_defaultFormat, + cldnn::tensor(1, 1, TensorValue(blobDims.back()), 1)); + blobId = CreatePrimitiveFromBlob(topology, blobId, blob.second, genericBlobLayout); + // save index in blobIndex + blobIndex[blob.first] = reorderedInputs.size(); + // add to reorderedInputs + reorderedInputs.push_back(blobId); + } + + // Handle kernel parameters + std::vector kernelParameters; + cldnn::format outputFormat(cldnn::format::any); + for (const auto& param : customLayer->KernelParams()) { + switch (param.type) { + case CLDNNCustomLayer::ParamType::Input: { + kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); + kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input; + kernelParameters[param.paramIndex].index = static_cast((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex); + + // Handle input reorder + if (param.portIndex < inputPrimitives.size() && reorderedInputs[param.portIndex].empty()) { + // todo: add support for multiple reorders of the same input? (read as bfyx for one arg and yxfb for another) + if (param.format != cldnn::format::any) { + auto reorderPrimName = inputPrimitives[param.portIndex] + "_" + layer->name + m_preCustomLayerTag; + auto preprocessPrim = cldnn::reorder( + reorderPrimName, + inputPrimitives[param.portIndex], + param.format, + DataTypeFromPrecision(layer->precision)); + + primitivesToIRLayersMap[reorderPrimName] = { layer->name }; + topology.add(preprocessPrim); + profilingIDs.push_back(reorderPrimName); + InitProfileInfo(reorderPrimName, "Reorder"); + reorderedInputs[param.portIndex] = (reorderPrimName); + } else { + reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex]; + } + } + } + break; + case CLDNNCustomLayer::ParamType::Output: { + kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); + kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_output; + kernelParameters[param.paramIndex].index = + static_cast((param.portIndex >= inputPrimitives.size()) ? -1 : param.portIndex); + outputFormat = param.format; + } + break; + case CLDNNCustomLayer::ParamType::Data: { + kernelParameters.resize(kernelParameters.size() > size_t(param.paramIndex + 1) ? kernelParameters.size() : size_t(param.paramIndex + 1)); + kernelParameters[param.paramIndex].arg_type = cldnn_arg_type::arg_input; + kernelParameters[param.paramIndex].index = + static_cast((blobIndex.find(param.blobName) == blobIndex.end()) ? -1 : blobIndex.at(param.blobName)); + } + break; + default: + THROW_CLDNN_EXCEPTION("Invalid custom layer param type: " << param.type << " in layer: " << genericLayer->name); + } + } + const std::string layerTitle("\n// Layer " + layer->name + " using Custom Layer " + customLayer->Name() + "\n"); + const std::string defineTitle("// Custom Layer User Defines\n"); + + auto dims = genericLayer->outData[0]->getTensorDesc().getDims(); + size_t N = (dims.size() > 0) ? dims[0] : 1; + size_t C = (dims.size() > 1) ? dims[1] : 1; + size_t H = (dims.size() > 2) ? dims[2] : 1; + size_t W = (dims.size() > 3) ? dims[3] : 1; + cldnn::tensor outputTensor = cldnn::tensor(cldnn::batch(N), cldnn::feature(C), cldnn::spatial(W, H)); + + cldnn::layout outputLayout = cldnn::layout(DataTypeFromPrecision(genericLayer->precision), outputFormat, outputTensor); + + // evaluate work sizes rules + std::vector gws, lws; + + // assume output tensor is dimension source by default + int batchDim = outputTensor.batch[0]; + int featureDim = outputTensor.feature[0]; + int yDim = outputTensor.spatial[1]; + int xDim = outputTensor.spatial[0]; + int iidx = customLayer->InputDimSourceIndex(); + + std::string genericLayerName = layer_type_name_ID(layer); + // if input index is greater than -1, take dimension from input + if (iidx >= 0) { + if (iidx >= genericLayer->insData.size()) + THROW_CLDNN_EXCEPTION("Invalid input tensor for index: " << iidx); + // get dimensions from one of the input tensors + auto inDataPtr = genericLayer->insData[iidx].lock(); + if (!inDataPtr) { + THROW_CLDNN_EXCEPTION("Data inserted into generic layer " << genericLayer->name << " is nullptr"); + } + SizeVector inputDims = inDataPtr->getTensorDesc().getDims(); + + xDim = inputDims[inputDims.size() - 1]; + yDim = dims.size() > 1 ? inputDims[inputDims.size() - 2] : 0; + featureDim = dims.size() > 2 ? inputDims[inputDims.size() - 3] : 0; + batchDim = dims.size() > 3 ? inputDims[inputDims.size() - 4]: 0; + } + const std::map vars = { + { 'b', batchDim } , { 'B', batchDim }, + { 'f', featureDim }, { 'F', featureDim }, + { 'y', yDim }, { 'Y', yDim }, + { 'x', xDim }, { 'X', xDim }, + }; + for (auto rule : customLayer->GlobalSizeRules()) { + SimpleMathExpression expr; + expr.SetVariables(vars); + expr.SetExpression(rule); + gws.push_back(expr.Evaluate()); + } + for (auto rule : customLayer->LocalSizeRules()) { + SimpleMathExpression expr; + expr.SetVariables(vars); + expr.SetExpression(rule); + lws.push_back(expr.Evaluate()); + } + + auto customPrim = cldnn::custom_gpu_primitive( + genericLayerName, + reorderedInputs, + { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() }, + customLayer->KernelEntry(), + kernelParameters, + customLayer->CompilerOptions(), + outputLayout, + gws, + lws); + + if (outputLayout.format != cldnn::format::any && + p_currentOutputs.find(genericLayerName) == p_currentOutputs.end()) { + // Handle output reorder + auto reorderPrimName = genericLayerName + m_postCustomLayerTag; + topology.add( + cldnn::reorder( + reorderPrimName, + genericLayerName, + m_defaultFormat, + customPrim.output_layout.data_type)); + primitivesToIRLayersMap[reorderPrimName] = { layer->name }; + primitiveIDs[genericLayerName] = reorderPrimName; + primitiveIDs[reorderPrimName] = reorderPrimName; + profilingIDs.push_back(reorderPrimName); + InitProfileInfo(reorderPrimName, "Reorder"); + } else { + primitiveIDs[genericLayerName] = genericLayerName; + } + primitivesToIRLayersMap[genericLayerName] = { layer->name }; + topology.add(customPrim); + profilingIDs.push_back(genericLayerName); +} + +void Program::CreateSimplerNMSPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 3); + IE_ASSERT(layer->insData[0].lock()->getTensorDesc().getDims().front() == 1); // only handling input batch size 1 + IE_ASSERT(layer->insData[1].lock()->getTensorDesc().getDims().front() == 1); // only handling input batch size 1 + auto simpleNMSLayer = as (layer); + + int max_num_proposals = simpleNMSLayer->GetParamAsInt("max_num_proposals"); + float iou_threshold = simpleNMSLayer->GetParamAsFloat("iou_threshold", 0.7f); + int min_bbox_size = simpleNMSLayer->GetParamAsInt("min_bbox_size", 16); + int feature_stride = simpleNMSLayer->GetParamAsInt("feat_stride", 16); + int pre_nms_topn = simpleNMSLayer->GetParamAsInt("pre_nms_topn"); + int post_nms_topn = simpleNMSLayer->GetParamAsInt("post_nms_topn"); + std::vector scale = simpleNMSLayer->GetParamAsFloats("scale"); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + std::string simpleNMSLayerName = layer_type_name_ID(layer); + auto simpleNMSPrim = cldnn::proposal( + simpleNMSLayerName, + inputPrimitives[0], // cls_score + inputPrimitives[1], // bbox_pred + inputPrimitives[2], // im_info + max_num_proposals, + iou_threshold, + min_bbox_size, + feature_stride, + pre_nms_topn, + post_nms_topn, + { 0.5f, 1.0f, 2.0f }, // ratios for the SimplerNMS variant + scale); + + primitivesToIRLayersMap[simpleNMSLayerName] = { layer->name }; + primitiveIDs[simpleNMSLayerName] = simpleNMSLayerName; + topology.add(simpleNMSPrim); + profilingIDs.push_back(simpleNMSLayerName); +} + +void Program::CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateEltwiseLayer(layer); + + auto eltwiseLayer = as (layer); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + std::vector coefficients = eltwiseLayer->coeff; + if (eltwiseLayer->_operation != InferenceEngine::EltwiseLayer::Sum && !coefficients.empty()) { + THROW_IE_EXCEPTION << "Only sum operation supports operands coefficients"; + } + + if (!coefficients.empty() && coefficients.size() != inputPrimitives.size()) { + THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands"; + } + + std::string eltwiseLayerName = layer_type_name_ID(layer); + auto eltwisePrim = cldnn::eltwise( + eltwiseLayerName, + inputPrimitives, + EltwiseModeFromIEEltwise(eltwiseLayer->_operation), + coefficients); + + auto sumScaleBlob = getBlobOrNull(layer, "eltwise-sum-scale"); + + if (sumScaleBlob != nullptr) { + // Quantization for eltwise is currently supported only for two inputs, where one of them is convolution + if (layer->insData.size() != 2) { + THROW_CLDNN_EXCEPTION("Too many inputs (" << layer->insData.size() << ") for quantized Eltwise layer " << layer->name); + } + + auto input0Type = LayerTypeFromStr(layer->insData[0].lock()->getCreatorLayer().lock()->type); + auto input1Type = LayerTypeFromStr(layer->insData[1].lock()->getCreatorLayer().lock()->type); + + size_t otherInputIdx; + if (input0Type == LayerType::Convolution) { + otherInputIdx = 1; + } else if (input1Type == LayerType::Convolution) { + otherInputIdx = 0; + } else { + THROW_CLDNN_EXCEPTION("Could not find Convolution to fuse " << layer->name << " with - required for quantized Eltwise"); + } + + eltwisePrim.input_quantization_factors.resize(2, 1.f); + // For now only per tensor quantization is supported + eltwisePrim.input_quantization_factors[otherInputIdx] = sumScaleBlob->buffer().as()[0]; + } + + topology.add(eltwisePrim); + primitivesToIRLayersMap[eltwiseLayerName] = { layer->name }; + profilingIDs.push_back(eltwiseLayerName); + + // Cast output data type if it differs from operation precision + auto operationPrecision = layer->precision; + auto outputPrecision = layer->outData[0]->getPrecision(); + + auto lastLayerName = eltwiseLayerName; + + if (operationPrecision != outputPrecision) { + auto reorderLayerName = eltwiseLayerName + "_cldnn_out_cast"; + auto reorderPrim = cldnn::reorder( + reorderLayerName, + eltwiseLayerName, + defaultFormatForDims(layer->outData[0]->getTensorDesc().getDims().size()), + DataTypeFromPrecision(outputPrecision)); + + topology.add(reorderPrim); + primitivesToIRLayersMap[reorderLayerName] = { layer->name }; + profilingIDs.push_back(reorderLayerName); + primitiveIDs[reorderLayerName] = reorderLayerName; + + lastLayerName = reorderLayerName; + } + + primitiveIDs[eltwiseLayerName] = lastLayerName; +} + +inline cldnn::concatenation::concatenation_axis ConcatAxisFromIEAxis(unsigned axis, unsigned sz) { + if (axis >= sz) + THROW_CLDNN_EXCEPTION("Concatenation axis exceeds number of dimensions"); + + // Difference in dimension ordering between IE and clDNN, + // reverse spatial dimensions after batch and feature. + unsigned cldnn_axis = axis; + if (axis >= 2) { + auto spatial_axis = axis - 2; + // Default and minimum number of dimensions is 4 + auto spatial_size = std::max(sz, 4u) - 2; + cldnn_axis = spatial_size - spatial_axis - 1 + 2; + } + + switch (cldnn_axis) { + case 0: + return cldnn::concatenation::concatenation_axis::along_b; + case 1: + return cldnn::concatenation::concatenation_axis::along_f; + case 2: + return cldnn::concatenation::concatenation_axis::along_x; + case 3: + return cldnn::concatenation::concatenation_axis::along_y; + case 4: + return cldnn::concatenation::concatenation_axis::along_z; + case 5: + return cldnn::concatenation::concatenation_axis::along_w; + default: THROW_CLDNN_EXCEPTION("Unsupported concatenation axis: " << axis); + break; + } + + return cldnn::concatenation::concatenation_axis::along_f; // shouldn't get here +} + +void Program::CreateConcatenatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 0); + auto concatLayer = as (layer); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + std::string concatLayerName = layer_type_name_ID(layer); + auto concatPrim = cldnn::concatenation( + concatLayerName, + inputPrimitives, + ConcatAxisFromIEAxis(concatLayer->_axis, + concatLayer->input().get()->getTensorDesc().getDims().size())); + + primitivesToIRLayersMap[concatLayerName] = { layer->name }; + primitiveIDs[concatLayerName] = concatLayerName; + topology.add(concatPrim); + profilingIDs.push_back(concatLayerName); +} + +void Program::CreateSplitPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto splitLayer = as (layer); + if (IsValidSplitConvMerge(splitLayer)) { + // AlextNet style split->conv*2->merge + CreateFusedSplitConvMergePrimitive(topology, layer); + } else { +#ifdef _USE_SPLIT_PRIMITIVE + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto inputDims = splitLayer->insData[0].lock()->getTensorDesc().getDims(); + InferenceEngine::SizeVector startOffset(inputDims.size()); + std::vector> outputOffsets; + + std::string splitLayerName = layer_type_name_ID(layer); + for (auto& outLayer : splitLayer->outData) { + if (outLayer->dims.size() != startOffset.size()) { + THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name); + } + for (size_t i = 0; i < inputDims.size(); i++) { + if ((outLayer->dims[i] + startOffset[i]) > inputDims[i]) { + THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name); + } + } + auto outTensor = CldnnTensorFromIEDims(outLayer->getTensorDesc().getDims()); + std::string outLayerName = splitLayer->type + ":" + outLayer->name; + + auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, CldnnTensorFromIEDims(startOffset)); + topology.add(cropPrim); + + primitivesToIRLayersMap[outLayerName] = { layer->name }; + primitiveIDs[outLayerName] = outLayerName; + profilingIDs.push_back(outLayerName); + outputOffsets.emplace_back(outLayerName, CldnnTensorFromIEDims(startOffset)); + for (size_t i = 0; i < inputDims.size(); i++) { + if (outLayer->dims[i] != inputDims[i]) { + startOffset[i] += outLayer->dims[i]; + } + } + } + + auto splitPrim = cldnn::split( + splitLayerName, + inputPrimitives[0], + outputOffsets); + topology.add(splitPrim); + + // set split as not_run + InitProfileInfo(splitLayerName, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out + +#else // _USE_SPLIT_PRIMITIVE + // TODO: replace with clDNN split when it's implemented + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto inDataPtr = splitLayer->insData[0].lock(); + if (!inDataPtr) { + THROW_CLDNN_EXCEPTION("Data inserts into split layer " << splitLayer->name << " is nullptr"); + } + auto inputDims = inDataPtr->getTensorDesc().getDims(); + InferenceEngine::SizeVector startOffset(inputDims.size()); + + for (auto& outLayer : splitLayer->outData) { + std::string outLayerName = layer_type_lower(splitLayer) + ":" + outLayer->getName(); + const auto outLayerDims = outLayer->getTensorDesc().getDims(); + if (outLayerDims.size() != startOffset.size()) { + THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->getName()); + } + for (size_t i = 0; i < inputDims.size(); i++) { + if ((outLayerDims[i] + startOffset[i]) > inputDims[i]) { + THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->getName()); + } + } + + auto outTensor = CldnnTensorFromIEDims(outLayerDims, 1); + auto offsetTensor = CldnnTensorFromIEDims(startOffset, 0); + + auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor); + primitivesToIRLayersMap[outLayerName] = { layer->name }; + primitiveIDs[outLayerName] = outLayerName; + topology.add(cropPrim); + profilingIDs.push_back(outLayerName); + InitProfileInfo(outLayerName, "Crop"); + + for (size_t i = 0; i < inputDims.size(); i++) { + if (outLayerDims[i] != inputDims[i]) { + startOffset[i] += outLayerDims[i]; + } + } + } + + // set split as not_run + InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out +#endif // _USE_SPLIT_PRIMITIVE + } +} + +void Program::CreateFusedSplitConvMergePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + auto inputPrimitives = GetPrevLayersPrimitives(layer); + // only handle the split->conv->merge topology for now + auto splitLayer = as (layer); + IE_ASSERT(IsValidSplitConvMerge(splitLayer)); + + auto convLayer1 = + as (GetNextSingleLayer(splitLayer->outData[0])); + auto convLayer2 = + as (GetNextSingleLayer(splitLayer->outData[1])); + auto concatLayer = + as (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0]))); + + // Mark these layers as optimized out + InitProfileInfo(convLayer1->name, convLayer1->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); + InitProfileInfo(convLayer2->name, convLayer2->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); + InitProfileInfo(concatLayer->name, concatLayer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); + + // build the split conv primitive + std::vector weightPrimID; + std::vector biasPrimID; + CreateWeightAndBiasPrimitives(topology, GetNextSingleLayer(splitLayer->outData[0]), weightPrimID, biasPrimID); + CreateWeightAndBiasPrimitives(topology, GetNextSingleLayer(splitLayer->outData[1]), weightPrimID, biasPrimID); + + auto concatLayerPtr = std::make_shared(*concatLayer); + + cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(convLayer1->_stride[X_AXIS], convLayer1->_stride[Y_AXIS])); + auto allPad = getPaddings(*convLayer1); + int x_pad = allPad.begin[X_AXIS], y_pad = allPad.begin[Y_AXIS]; + cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), + cldnn::spatial(-x_pad, -y_pad, 0)); + + cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(convLayer1->_dilation[X_AXIS], convLayer1->_dilation[Y_AXIS])); + + std::string splitLayerName = layer_type_name_ID(layer); + auto splitPrim = cldnn::convolution(splitLayerName, + inputPrimitives[0], + weightPrimID, + biasPrimID, + stride, + padding, + dilation, + false, + 0.0f, + CldnnTensorFromIEDims(concatLayer->outData[0]->getTensorDesc().getDims())); + + layer = concatLayerPtr; + + primitivesToIRLayersMap[splitLayerName] = {convLayer1->name, convLayer2->name, concatLayer->name}; + primitiveIDs[splitLayerName] = splitLayerName; + primitiveIDs[layer_type_name_ID(convLayer1)] = splitLayerName; + primitiveIDs[layer_type_name_ID(convLayer2)] = splitLayerName; + primitiveIDs[layer_type_name_ID(concatLayer)] = splitLayerName; // pair the last merged layer (concat or relu) with + // this primitive name to be used as + // input prim for subsequent layers + topology.add(splitPrim); + profilingIDs.push_back(splitLayerName); +} + +void Program::CreatePowerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto powerLayer = as (layer); + if (powerLayer->power != 1.0f && powerLayer->power != 0.5f) { + auto power = powerLayer->power; + auto scale = powerLayer->scale; + auto shift = powerLayer->offset; + + std::string powerLayerName = layer_type_name_ID(layer); + std::string linearLayerName = powerLayerName + "_linear_activation"; + auto linearActivationPrim = cldnn::activation(linearLayerName, inputPrimitives[0], activation_linear, { scale, shift }); + topology.add(linearActivationPrim); + profilingIDs.push_back(linearLayerName); + primitiveIDs[linearLayerName] = linearLayerName; + + auto powActivationPrim = cldnn::activation(powerLayerName, linearLayerName, activation_pow, { power, 0.f }); + topology.add(powActivationPrim); + profilingIDs.push_back(powerLayerName); + primitiveIDs[powerLayerName] = powerLayerName; + } else { + std::string powerLayerName = layer_type_name_ID(layer); + if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) { + if (powerLayer->power == 0.5f) { + auto activationPrim = cldnn::activation(powerLayerName, inputPrimitives[0], activation_sqrt); + topology.add(activationPrim); + profilingIDs.push_back(powerLayerName); + primitiveIDs[powerLayerName] = powerLayerName; + } else { + // skip this layer + primitiveIDs[powerLayerName] = inputPrimitives[0]; // register the previous primID for this layer too + InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::NOT_RUN); // Mark this layer as not run + } + } else { + // create scale primitive + auto scaleValuePrimName = powerLayerName + m_scalesTag; + AddSingleValuePrimitive(topology, scaleValuePrimName, + DataTypeFromPrecision(powerLayer->precision), + powerLayer->scale); + + cldnn::primitive_id biasValuePrimName = ""; + if (powerLayer->offset != 0.0f) { + biasValuePrimName = powerLayerName + m_biasesTag; + AddSingleValuePrimitive(topology, biasValuePrimName, + DataTypeFromPrecision(powerLayer->precision), + powerLayer->offset); + } + auto scalePrim = cldnn::scale( + powerLayerName, + inputPrimitives[0], + scaleValuePrimName, + biasValuePrimName); + + primitiveIDs[powerLayerName] = powerLayerName; + topology.add(scalePrim); + profilingIDs.push_back(powerLayerName); + + if (powerLayer->power == 0.5f) { + auto activationPrim = cldnn::activation(powerLayerName + "_sqrt", powerLayerName, activation_sqrt); + topology.add(activationPrim); + profilingIDs.push_back(powerLayerName + "_sqrt"); + } + } + } +} + +void Program::CreateSoftMaxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto softmaxLayer = as (layer); + + std::string softmaxLayerName = layer_type_name_ID(layer); + auto softmaxPrim = cldnn::softmax(softmaxLayerName, + inputPrimitives[0], + SoftmaxDimensionFromIEAxis(softmaxLayer)); + primitivesToIRLayersMap[softmaxLayerName] = { layer->name }; + primitiveIDs[softmaxLayerName] = softmaxLayerName; + topology.add(softmaxPrim); + profilingIDs.push_back(softmaxLayerName); +} + +void Program::CreateFullyConnectedPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto fcLayer = as (layer); + + std::string fcLayerName = layer_type_name_ID(layer); + // create bias primitive + cldnn::primitive_id biasesPrimID = ""; + if (fcLayer->blobs.count("biases") != 0) { + biasesPrimID = fcLayerName + m_biasesTag; + auto biasesBlob = getBlob(layer, "biases"); + cldnn::layout fcbLayout( + DataTypeFromPrecision(biasesBlob->getTensorDesc().getPrecision()), + m_defaultFormat, + (cldnn::tensor) cldnn::spatial(TensorValue(fcLayer->_out_num))); + biasesPrimID = CreatePrimitiveFromBlob(topology, biasesPrimID, biasesBlob, fcbLayout); + } + + // create weights primitive + // gcc bug to resolve auto, at least for 5.4 version + std::shared_ptr insData0 = fcLayer->insData[0].lock(); + IE_ASSERT(insData0 != nullptr); + cldnn::primitive_id weightsPrimID = fcLayerName + m_weightsTag; + cldnn::tensor weightsDims; + InferenceEngine::SizeVector insData0dims(insData0->getTensorDesc().getDims()); + switch (insData0dims.size()) { + case 4: + weightsDims = { TensorValue(fcLayer->outData[0]->getTensorDesc().getDims().back()), + TensorValue(insData0dims[1]), + TensorValue(insData0dims[3]), + TensorValue(insData0dims[2]) }; + break; + case 3: + weightsDims = { TensorValue(fcLayer->outData[0]->getTensorDesc().getDims().back()), + TensorValue(insData0dims[1]), + 1, + TensorValue(insData0dims[2])}; + break; + case 2: + weightsDims = { TensorValue(fcLayer->outData[0]->getTensorDesc().getDims().back()), TensorValue(insData0dims[1]), 1, 1 }; + break; + default: THROW_CLDNN_EXCEPTION("Invalid data dimensions"); + } + auto weightsBlob = getBlob(layer, "weights"); + cldnn::layout fcwLayout( + DataTypeFromPrecision(weightsBlob->getTensorDesc().getPrecision()), + m_defaultFormat, + weightsDims); + weightsPrimID = CreatePrimitiveFromBlob(topology, weightsPrimID, weightsBlob, fcwLayout); + + auto inputPrecision = layer->insData[0].lock()->getPrecision(); + auto inputQuantized = + inputPrecision == InferenceEngine::Precision::I8 || + inputPrecision == InferenceEngine::Precision::U8; + + auto outputPrecision = layer->outData[0]->getPrecision(); + auto outputQuantized = + outputPrecision == InferenceEngine::Precision::I8 || + outputPrecision == InferenceEngine::Precision::U8; + + std::vector wQuantizationPrimID; + + CreateQuantizationPrimitives(topology, layer, wQuantizationPrimID, true); + + if (!inputQuantized || outputQuantized) { + // Either no quantization or output is also quantized + auto fcPrim = cldnn::fully_connected(fcLayerName, + inputPrimitives[0], + weightsPrimID, + biasesPrimID, + false, + 0.0f); + + // Add quantization + if (!wQuantizationPrimID.empty()) { + fcPrim.weights_quantization_factors = wQuantizationPrimID[0]; + } + + topology.add(fcPrim); + } else { + // Output is supposed to be float. + // Currently fully_connected does not support output data type forcing, + // so 1x1 convolution is used instead. + + if (wQuantizationPrimID.empty()) + THROW_CLDNN_EXCEPTION("Could not find dequantization scales for " << fcLayerName); + + // We need to flatten weights for 1x1 kernel + if (insData0dims.size() != 2) { + auto newWeightsPrimID = weightsPrimID + "_cldnn_w_flatten"; + auto newShape = cldnn::tensor( + cldnn::batch(weightsDims.batch[0]), + cldnn::feature(weightsDims.feature[0] * weightsDims.spatial[0] * weightsDims.spatial[1])); + auto reshapePrim = cldnn::reshape(newWeightsPrimID, weightsPrimID, newShape); + + topology.add(reshapePrim); + weightsPrimID = newWeightsPrimID; + } + + auto convPrim = cldnn::convolution(fcLayerName, + inputPrimitives[0], + { weightsPrimID }, + { biasesPrimID }, + cldnn::tensor(1), + cldnn::tensor(0), + cldnn::tensor(1), + false, + 0.f); + + convPrim.output_data_type = DataTypeFromPrecision(outputPrecision); + + // TODO Fix in clDNN - there is no reason this should be immutable, most other fields are mutable + auto& wq = const_cast&>(convPrim.weights_quantization_factors.ref()); + wq.insert(wq.end(), wQuantizationPrimID.begin(), wQuantizationPrimID.end()); + + topology.add(convPrim); + } + + primitivesToIRLayersMap[fcLayerName] = { layer->name }; + primitiveIDs[fcLayerName] = fcLayerName; + profilingIDs.push_back(fcLayerName); +} + +void Program::CreatePoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto poolLayer = as(layer); + + std::string poolLayerName = layer_type_name_ID(layer); + auto allPads = getPaddings(*poolLayer); + if (poolLayer->outData.size() > 1) { + // max pooling with argmax + SizeVector argmaxDims; + + std::string realOutputID, argmaxOutputID; + int outputOrder = 0; + + for (auto out : poolLayer->outData) { + auto layersMap = out->getInputTo(); + + for (auto item : layersMap) { + bool isUpooling = (LayerTypeFromStr(item.second->type) == Unpooling); + if (outputOrder == 1 && isUpooling) { + argmaxDims = InferenceEngine::SizeVector(out->getTensorDesc().getDims()); + argmaxOutputID = out->getName(); + } else { + realOutputID = out->getName(); + } + outputOrder++; + } + } + + // create mutable_data primitive for storing argmax data + cldnn::tensor mutableTensor; + switch (argmaxDims.size()) { + case 4: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1]), + TensorValue(argmaxDims[3]), TensorValue(argmaxDims[2])); + break; + case 3: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1]), + 1, TensorValue(argmaxDims[2])); + break; + case 2: mutableTensor = cldnn::tensor(TensorValue(argmaxDims[0]), TensorValue(argmaxDims[1]), 1, 1); + break; + case 1: // not implemented yet. + default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions"); + } + + cldnn::layout mutableLayout = cldnn::layout( + cldnn::data_types::f32, + m_defaultFormat, + mutableTensor); + + cldnn::primitive_id argmaxPrimID = layer->name + "_argmax_mutable"; + + auto mem = cldnn::memory::allocate(*m_engine, mutableLayout); + auto argmax_mutable_prim = cldnn::mutable_data(argmaxPrimID, mem); + topology.add(argmax_mutable_prim); + primitivesToIRLayersMap[argmaxPrimID] = { layer->name }; + primitivesToIRLayersMap[argmaxOutputID] = { layer->name }; + primitiveIDs[argmaxPrimID] = argmaxPrimID; + primitiveIDs[argmaxOutputID] = argmaxPrimID; + + // create pooling primitive itself + auto poolPrim = cldnn::pooling(poolLayerName, + inputPrimitives[0], + argmaxPrimID, + cldnn::pooling_mode::max_with_argmax, + (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])), // size + (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])), // stride + // input offset (padding) - explicit tensor for 0 bf + cldnn::tensor { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]), 0 }, + CldnnTensorFromIEDims(poolLayer->outData[0]->getTensorDesc().getDims())); + + topology.add(poolPrim); + primitiveIDs[realOutputID] = poolLayerName; + } else { + // regular pooling + cldnn::tensor size, stride, input_offset; + + if (poolLayer->input()->getTensorDesc().getDims().size() > 4) { + size = (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), + TensorValue(poolLayer->_kernel[Y_AXIS]), + TensorValue(poolLayer->_kernel[Z_AXIS])); + stride = (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), + TensorValue(poolLayer->_stride[Y_AXIS]), + TensorValue(poolLayer->_stride[Z_AXIS])); + input_offset = { 0, 0, -TensorValue(allPads.begin[X_AXIS]), + -TensorValue(allPads.begin[Y_AXIS]), + -TensorValue(allPads.begin[Z_AXIS]) }; + } else { + size = (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])); + stride = (cldnn::tensor) cldnn::spatial(TensorValue(poolLayer->_stride[X_AXIS]), TensorValue(poolLayer->_stride[Y_AXIS])); + input_offset = { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) }; + } + + auto poolPrim = cldnn::pooling(poolLayerName, + inputPrimitives[0], + PoolingModeFromIEPooling(poolLayer->_type, poolLayer->_exclude_pad), + size, + stride, + input_offset, + CldnnTensorFromIEDims(poolLayer->outData[0]->getTensorDesc().getDims())); + topology.add(poolPrim); + primitiveIDs[poolLayerName] = poolLayerName; + } + + primitivesToIRLayersMap[poolLayerName] = { layer->name }; + profilingIDs.push_back(poolLayerName); +} + +void Program::CreateLRNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto lrnLayer = as (layer); + std::string lrnLayerName = layer_type_name_ID(layer); + auto lrnPrim = cldnn::lrn( + lrnLayerName, + inputPrimitives[0], + lrnLayer->_size, + static_cast(lrnLayer->_k), + lrnLayer->_alpha, + lrnLayer->_beta, + lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel); + + primitivesToIRLayersMap[lrnLayerName] = { layer->name }; + primitiveIDs[lrnLayerName] = lrnLayerName; + topology.add(lrnPrim); + profilingIDs.push_back(lrnLayerName); +} + +void Program::CreateActivationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + cldnn_activation_additional_params params{ 0.0f, 0.0f }; + cldnn_activation_func func = cldnn_activation_func_t::activation_none; + + LayerType activationType; + if (type == Activation) { + std::string activation_type = layer->GetParamAsString("type"); + if (activation_type == "tanh") { + activationType = TanH; + } else if (activation_type == "sigmoid" || activation_type == "logistic") { + activationType = Sigmoid; + } else if (activation_type == "elu") { + activationType = ELU; + } else if (activation_type == "relu") { + activationType = ReLU; + } else if (activation_type == "relu6") { + activationType = ReLU6; + } else if (activation_type == "clamp") { + activationType = Clamp; + } else if (activation_type == "exp") { + activationType = Exp; + } else if (activation_type == "not") { + activationType = Not; + } else { + THROW_CLDNN_EXCEPTION("Unsupported activation type (" + activation_type + + ") in layer " + layer->name); + } + } else { + activationType = type; + } + + switch (activationType) { + case TanH: + { + func = cldnn_activation_func_t::activation_hyperbolic_tan; + break; + } + case ELU: + { + func = cldnn_activation_func_t::activation_elu; + params.a = layer->GetParamAsFloat("alpha", 1.0f); + break; + } + case Sigmoid: + { + func = cldnn_activation_func_t::activation_logistic; + break; + } + case ReLU: + { + auto negative_slope = layer->GetParamAsFloat("negative_slope", 0.0f); + if (negative_slope == 0.f) { + func = cldnn_activation_func_t::activation_relu; + } else { + func = cldnn_activation_func_t::activation_relu_negative_slope; + params.a = negative_slope; + } + break; + } + case ReLU6: + { + func = cldnn_activation_func_t::activation_clamp; + params.b = layer->GetParamAsFloat("n", 6.0f); + break; + } + case Clamp: + { + func = cldnn_activation_func_t::activation_clamp; + params.a = layer->GetParamAsFloat("min"); + params.b = layer->GetParamAsFloat("max"); + break; + } + case Exp: + { + func = cldnn_activation_func_t::activation_exp; + break; + } + case Not: + { + func = cldnn_activation_func_t::activation_not; + break; + } + case Asin: + { + func = cldnn_activation_func_t::activation_asin; + break; + } + case Asinh: + { + func = cldnn_activation_func_t::activation_asinh; + break; + } + case Acos: + { + func = cldnn_activation_func_t::activation_acos; + break; + } + case Acosh: + { + func = cldnn_activation_func_t::activation_acosh; + break; + } + case Atan: + { + func = cldnn_activation_func_t::activation_atan; + break; + } + case Atanh: + { + func = cldnn_activation_func_t::activation_atanh; + break; + } + case Abs: + { + func = cldnn_activation_func_t::activation_abs; + break; + } + case Floor: + { + func = cldnn_activation_func_t::activation_floor; + break; + } + case Ceil: + { + func = cldnn_activation_func_t::activation_ceil; + break; + } + case Erf: + { + func = cldnn_activation_func_t::activation_erf; + break; + } + case HardSigmoid: + { + func = cldnn_activation_func_t::activation_hard_sigmoid; + break; + } + case Log: + { + func = cldnn_activation_func_t::activation_log; + break; + } + case Neg: + { + func = cldnn_activation_func_t::activation_negative; + break; + } + case Reciprocal: + { + func = cldnn_activation_func_t::activation_reciprocal; + break; + } + case Selu: + { + func = cldnn_activation_func_t::activation_selu; + break; + } + case SoftPlus: + { + func = cldnn_activation_func_t::activation_softplus; + break; + } + case SoftSign: + { + func = cldnn_activation_func_t::activation_softsign; + break; + } + case Tan: + { + func = cldnn_activation_func_t::activation_tan; + break; + } + default: + THROW_CLDNN_EXCEPTION("Unsupported activation type (" + layer->type + + ") in layer " + layer->name); + } + + std::string layerName = layer_type_name_ID(layer); + auto activationPrimitive = cldnn::activation(layerName, inputPrimitives[0], func, params); + primitivesToIRLayersMap[layerName] = { layer->name }; + primitiveIDs[layerName] = layerName; + topology.add(activationPrimitive); + profilingIDs.push_back(layerName); +} + +void Program::CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + + // Optimize out and just update references + std::string layerName = layer_type_name_ID(layer); + primitivesToIRLayersMap[layerName] = { layer->name }; + primitiveIDs[layerName] = inputPrimitives[0]; + InitProfileInfo(layerName, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT); // Mark this layer as optimized out +} + +void Program::CreateUpsamplingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + // Assuming multi-input will be handled by prev concat/eltwise layers + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto upsamplingLayer = as (layer); + uint32_t scale = upsamplingLayer->GetParamAsUInt("scale"); + uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter"); + std::string sampleType = upsamplingLayer->GetParamAsString("sample_type"); + + std::string upsamplingLayerName = layer_type_name_ID(layer); + auto upsamplingPrim = cldnn::upsampling( + upsamplingLayerName, + inputPrimitives[0], + scale, + numFilter, + UpsamplingTypeFromString(sampleType)); + + primitivesToIRLayersMap[upsamplingLayerName] = { layer->name }; + primitiveIDs[upsamplingLayerName] = upsamplingLayerName; + topology.add(upsamplingPrim); + profilingIDs.push_back(upsamplingLayerName); +} + +void Program::CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto resampleLayer = as (layer); + + size_t inFeatures = 1; + float scale = 1.0f; + std::shared_ptr insData0 = layer->insData[0].lock(); + IE_ASSERT(insData0 != nullptr); + auto insData0dims = insData0->getTensorDesc().getDims(); + auto outDims = layer->outData[0]->getTensorDesc().getDims(); + + if (insData0dims.size() > 1) { + inFeatures = insData0dims[1]; + scale = static_cast(outDims.back()) / static_cast(insData0dims.back()); + if (scale < 1.0f) { + THROW_CLDNN_EXCEPTION("Unsupported scale in layer " + layer->name); + } + } + std::string sampleType = resampleLayer->GetParamAsString("type"); + std::string resampleLayerName = layer_type_name_ID(layer); + + cldnn::upsampling_sample_type cldnnSampleType; + if (sampleType == "caffe.ResampleParameter.NEAREST") { + cldnnSampleType = cldnn::upsampling_sample_type::nearest; + } else if (sampleType == "caffe.ResampleParameter.LINEAR") { + cldnnSampleType = cldnn::upsampling_sample_type::bilinear; + } else { + THROW_CLDNN_EXCEPTION("Unsupported resampling type (" + sampleType + ") in layer " + layer->name); + } + auto upsamplingPrim = cldnn::upsampling( + resampleLayerName, + inputPrimitives[0], + scale, + inFeatures, + cldnnSampleType); + + primitivesToIRLayersMap[resampleLayerName] = { layer->name }; + primitiveIDs[resampleLayerName] = resampleLayerName; + topology.add(upsamplingPrim); + profilingIDs.push_back(resampleLayerName); +} + +void Program::CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto YOLOregionLayer = as (layer); + + uint32_t coords = YOLOregionLayer->GetParamAsUInt("coords", 4); + uint32_t classes = YOLOregionLayer->GetParamAsUInt("classes", 20); + uint32_t num = YOLOregionLayer->GetParamAsUInt("num", 1); + bool do_softmax = YOLOregionLayer->GetParamAsBool("do_softmax", true); + + uint32_t mask_size = 0; + if (HasParam(YOLOregionLayer->params, "mask")) { + const auto mask = YOLOregionLayer->GetParamAsInts("mask"); + mask_size = static_cast(mask.size()); + } + + std::string YOLOregionLayerName = layer_type_name_ID(layer); + auto regionPrim = cldnn::region_yolo( + YOLOregionLayerName, + inputPrimitives[0], + coords, + classes, + num, + mask_size, + do_softmax); + + primitivesToIRLayersMap[YOLOregionLayerName] = { layer->name }; + primitiveIDs[YOLOregionLayerName] = YOLOregionLayerName; + topology.add(regionPrim); + profilingIDs.push_back(YOLOregionLayerName); +} + +void Program::CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto YOLOreorgLayer = as (layer); + uint32_t stride = YOLOreorgLayer->GetParamAsUInt("stride"); + + std::string YOLOreorgLayerName = layer_type_name_ID(layer); + auto reorgPrim = cldnn::reorg_yolo( + YOLOreorgLayerName, + inputPrimitives[0], + stride); + + primitivesToIRLayersMap[YOLOreorgLayerName] = { layer->name }; + primitiveIDs[YOLOreorgLayerName] = YOLOreorgLayerName; + topology.add(reorgPrim); + profilingIDs.push_back(YOLOreorgLayerName); +} + +void Program::CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto ArgMaxLayer = as (layer); + const cldnn::arg_max_min::out_type otype = type == ArgMin ? cldnn::arg_max_min::out_type::min : cldnn::arg_max_min::out_type::max; + + if (HasParam(ArgMaxLayer->params, "out_max_val")) { + int32_t out_max_val_flag = ArgMaxLayer->GetParamAsInt("out_max_val"); + if (out_max_val_flag != 0) { + THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str << "ArgMax: out_max_val param is not supported for layer: " << layer->name; + } + } + + uint32_t top_k = ArgMaxLayer->GetParamAsUInt("top_k", 1); + + cldnn::arg_max_min::axis_name chosen_axis = cldnn::arg_max_min::axis_name::xyf; + + if (HasParam(ArgMaxLayer->params, "axis")) { + int32_t axis_param = ArgMaxLayer->GetParamAsInt("axis", 1); + + int32_t axis = axis_param; + if (ArgMaxLayer->outData[0]->getTensorDesc().getDims().size() == 5) { + if (-5 <= axis && axis <= -1) + axis += 5; + + switch (axis) { + case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break; + case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break; + case 2: chosen_axis = cldnn::arg_max_min::axis_name::z; break; + case 3: chosen_axis = cldnn::arg_max_min::axis_name::y; break; + case 4: chosen_axis = cldnn::arg_max_min::axis_name::x; break; + } + } else { + if (-4 <= axis && axis <= -1) + axis += 4; + + switch (axis) { + case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break; + case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break; + case 2: chosen_axis = cldnn::arg_max_min::axis_name::y; break; + case 3: chosen_axis = cldnn::arg_max_min::axis_name::x; break; + } + } + } + + std::string ArgMaxLayerName = layer_type_name_ID(layer); + auto argmaxPrim = cldnn::arg_max_min( + ArgMaxLayerName, + inputPrimitives, + otype, + top_k, + chosen_axis); + + primitivesToIRLayersMap[ArgMaxLayerName] = { layer->name }; + primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName; + topology.add(argmaxPrim); + profilingIDs.push_back(ArgMaxLayerName); +} + +void Program::CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto ArgMaxLayer = as (layer); + + cldnn::arg_max_min::out_type otype; + cldnn::arg_max_min::sort_type stype; + + if (layer->GetParamAsString("mode", "max") == "max") + otype = cldnn::arg_max_min::out_type::max; + else + otype = cldnn::arg_max_min::out_type::min; + + if (layer->GetParamAsString("sort", "value") == "value") + stype = cldnn::arg_max_min::sort_type::sort_by_values; + else + stype = cldnn::arg_max_min::sort_type::sort_by_indices; + + auto topKInput = layer->insData[1].lock(); + auto topKInputCreator = topKInput->getCreatorLayer().lock(); + + std::vector topk; + if (topKInputCreator->blobs.size() == 1) { + auto constantBlob = topKInputCreator->blobs.begin()->second; + auto axesPrecision = constantBlob->getTensorDesc().getPrecision(); + if (axesPrecision == InferenceEngine::Precision::FP32) { + auto data = constantBlob->buffer().as(); + for (size_t i = 0; i < constantBlob->size(); ++i) + topk.push_back(data[i]); + } else if (axesPrecision == InferenceEngine::Precision::I32) { + auto data = constantBlob->buffer().as(); + for (size_t i = 0; i < constantBlob->size(); ++i) + topk.push_back(data[i]); + } else { + THROW_IE_EXCEPTION << layer->name << " Incorrect TopK input Precision"; + } + } + + uint32_t top_k = topk[0]; + + cldnn::arg_max_min::axis_name chosen_axis = cldnn::arg_max_min::axis_name::batch; + + if (HasParam(ArgMaxLayer->params, "axis")) { + int32_t axis_param = ArgMaxLayer->GetParamAsInt("axis", -1); + + auto input_dims_num = ArgMaxLayer->outData[0]->getTensorDesc().getDims().size(); + int32_t axis = axis_param; + if (input_dims_num == 5) { + if (-5 <= axis && axis <= -1) + axis += 5; + + switch (axis) { + case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break; + case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break; + case 2: chosen_axis = cldnn::arg_max_min::axis_name::z; break; + case 3: chosen_axis = cldnn::arg_max_min::axis_name::y; break; + case 4: chosen_axis = cldnn::arg_max_min::axis_name::x; break; + } + } else { + if (-input_dims_num <= axis && axis <= -1) + axis += input_dims_num; + + switch (axis) { + case 0: chosen_axis = cldnn::arg_max_min::axis_name::batch; break; + case 1: chosen_axis = cldnn::arg_max_min::axis_name::feature; break; + case 2: chosen_axis = cldnn::arg_max_min::axis_name::y; break; + case 3: chosen_axis = cldnn::arg_max_min::axis_name::x; break; + } + } + } + + if (layer->outData.size() == 2) { + cldnn::layout mutableLayout = cldnn::layout( + DataTypeFromPrecision(layer->outData[1]->getPrecision()), + m_defaultFormat, + CldnnTensorFromIEDims(layer->outData[1]->getDims())); + + auto shared_memory = cldnn::memory::allocate(*m_engine, mutableLayout); + + cldnn::primitive_id argmax_mutable_id_w = layer_type_name_ID(layer) + "_md_write"; + auto argmax_mutable_prim = cldnn::mutable_data(argmax_mutable_id_w, shared_memory); + primitivesToIRLayersMap[argmax_mutable_id_w] = {layer->name}; + primitiveIDs[argmax_mutable_id_w] = argmax_mutable_id_w; + topology.add(argmax_mutable_prim); + inputPrimitives.push_back(argmax_mutable_id_w); + + std::string ArgMaxLayerName = layer_type_lower(layer) + ":" + layer->outData[1]->getName(); + auto argmaxPrim = cldnn::arg_max_min( + ArgMaxLayerName, + inputPrimitives, + otype, + top_k, + chosen_axis, + stype, + true); + + primitivesToIRLayersMap[ArgMaxLayerName] = {layer->name}; + primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName; + topology.add(argmaxPrim); + + cldnn::primitive_id argmax_mutable_id_r = layer_type_lower(layer) + ":" + layer->outData[0]->getName(); + auto argmax_mutable_prim_r = cldnn::mutable_data(argmax_mutable_id_r, {ArgMaxLayerName}, shared_memory); + primitivesToIRLayersMap[argmax_mutable_id_r] = {layer->name}; + primitiveIDs[argmax_mutable_id_r] = argmax_mutable_id_r; + topology.add(argmax_mutable_prim_r); + + profilingIDs.push_back(ArgMaxLayerName); + } else if (layer->outData.size() == 1) { + std::string ArgMaxLayerName = layer_type_lower(layer) + ":" + layer->outData[0]->getName(); + auto argmaxPrim = cldnn::arg_max_min( + ArgMaxLayerName, + inputPrimitives, + otype, + top_k, + chosen_axis, + stype, + true); + + primitivesToIRLayersMap[ArgMaxLayerName] = {layer->name}; + primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName; + topology.add(argmaxPrim); + profilingIDs.push_back(ArgMaxLayerName); + } else { + THROW_IE_EXCEPTION << layer->name << " Incorrect TopK outputs number"; + } +} + +void Program::CreateMaxUnpoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + + auto UnpoolingLayer = as (layer); + + cldnn::primitive_id real_input, argmax_mutable; + + // locate ArgMax primitive + int inputOrder = 0; + for (auto inputData : layer->insData) { + auto prevData = inputData.lock(); + + if (prevData == nullptr) { + THROW_CLDNN_EXCEPTION("MaxUnpooling: nonexistent input for layer: " << layer->name); + } + + auto prevCreator = prevData->getCreatorLayer().lock(); + + if (prevCreator && + (LayerTypeFromStr(prevCreator->type) == Pooling) && + prevCreator->outData.size() > 1 && + inputOrder == 1) { + argmax_mutable = primitiveIDs.at(prevCreator->name + "_argmax_mutable"); + } else { + real_input = primitiveIDs.at(prevData->getName()); + } + inputOrder++; + } + + uint32_t stride = UnpoolingLayer->GetParamAsUInt("stride"); + uint32_t kernel_size = UnpoolingLayer->GetParamAsUInt("kernel_size"); + + std::string UnpoolingLayerName = layer_type_name_ID(layer); + auto unpoolingPrim = cldnn::max_unpooling( + UnpoolingLayerName, + real_input, + argmax_mutable, + (cldnn::tensor) cldnn::spatial(kernel_size, kernel_size), // size + (cldnn::tensor) cldnn::spatial(stride, stride) ); // stride + + primitivesToIRLayersMap[UnpoolingLayerName] = { layer->name }; + primitiveIDs[UnpoolingLayerName] = UnpoolingLayerName; + topology.add(unpoolingPrim); + profilingIDs.push_back(UnpoolingLayerName); +} + +void Program::CreateMVNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto MvnLayer = as (layer); + + bool across_channels = MvnLayer->GetParamAsBool("across_channels", false); + bool normalize_variance = MvnLayer->GetParamAsBool("normalize_variance", true); + float eps = MvnLayer->GetParamAsFloat("eps", 1e-10f); + + std::string MvnLayerName = layer_type_name_ID(layer); + auto mvnPrim = cldnn::mvn( + MvnLayerName, + inputPrimitives[0], + across_channels, + normalize_variance, + eps); + + primitivesToIRLayersMap[MvnLayerName] = { layer->name }; + primitiveIDs[MvnLayerName] = MvnLayerName; + topology.add(mvnPrim); + profilingIDs.push_back(MvnLayerName); +} + +void Program::CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto tileLayer = as (layer); + + int axis = tileLayer->GetParamAsInt("axis", 1); + int tiles = tileLayer->GetParamAsInt("tiles"); + + auto sz = tileLayer->input().get()->getTensorDesc().getDims().size(); + + auto cldnnAxisFromIE = [&](int axis) { + switch (axis) { + case 0: return cldnn::tile::tile_axis::along_b; + case 1: return cldnn::tile::tile_axis::along_f; + case 2: + if (sz > 4) + return cldnn::tile::tile_axis::along_z; + else + return cldnn::tile::tile_axis::along_y; + case 3: + if (sz > 4) + return cldnn::tile::tile_axis::along_y; + else + return cldnn::tile::tile_axis::along_x; + case 4: return cldnn::tile::tile_axis::along_x; + default: THROW_CLDNN_EXCEPTION("Unsupported tile axis: " << axis); + } + }; + + std::string tileLayerName = layer_type_name_ID(layer); + auto tilePrim = cldnn::tile( + tileLayerName, + inputPrimitives[0], + cldnnAxisFromIE(axis), + tiles); + + primitivesToIRLayersMap[tileLayerName] = { layer->name }; + primitiveIDs[tileLayerName] = tileLayerName; + topology.add(tilePrim); + profilingIDs.push_back(tileLayerName); +} + +void Program::CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto padLayer = as (layer); + + auto PadTensorFromArgs = [](const std::string &s) -> cldnn::tensor { + std::stringstream ss(s); + std::string item; + std::vector elems; + while (std::getline(ss, item, ',')) { + elems.push_back(static_cast(std::atoll(item.c_str()))); + } + + while (elems.size() < 4) { + elems.push_back(0); + } + + // Swap x and y + auto tmp = elems[2]; + elems[2] = elems[3]; + elems[3] = tmp; + + return cldnn::tensor(elems, 0); + }; + + auto pads_begin = PadTensorFromArgs(padLayer->GetParamAsString("pads_begin")); + auto pads_end = PadTensorFromArgs(padLayer->GetParamAsString("pads_end")); + std::string mode = padLayer->GetParamAsString("pad_mode"); + float pad_value = padLayer->GetParamAsFloat("pad_value", 0.0f); + + cldnn::border_type border_mode; + if (mode == "constant") + border_mode = cldnn::border_type::constant; + else if (mode == "edge") + border_mode = cldnn::border_type::edge; + else if (mode == "symmetric") + border_mode = cldnn::border_type::mirror; + else if (mode == "reflect") + border_mode = cldnn::border_type::mirror_101; + else + THROW_CLDNN_EXCEPTION("Invalid border mode " << mode << " in layer " << padLayer->name); + + std::string padLayerName = layer_type_name_ID(layer); + auto tilePrim = cldnn::border( + padLayerName, + inputPrimitives[0], + pads_begin, + pads_end, + border_mode, + pad_value); + + primitivesToIRLayersMap[padLayerName] = { layer->name }; + primitiveIDs[padLayerName] = padLayerName; + topology.add(tilePrim); + profilingIDs.push_back(padLayerName); +} + +void Program::AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + if (layer->blobs.empty()) + THROW_IE_EXCEPTION << "No blobs found in const layer " << layer->name; + auto constBlob = layer->blobs.begin()->second; + SizeVector constDims(layer->outData[0]->getTensorDesc().getDims()); + + cldnn::tensor constTensor; + switch (constDims.size()) { + case 6: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]), + TensorValue(constDims[5]), TensorValue(constDims[4]), + TensorValue(constDims[3]), TensorValue(constDims[2])); + break; + case 5: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]), + TensorValue(constDims[4]), TensorValue(constDims[3]), TensorValue(constDims[2])); + break; + case 4: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]), + TensorValue(constDims[3]), TensorValue(constDims[2])); + break; + case 3: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]), + 1, TensorValue(constDims[2])); + break; + case 2: constTensor = cldnn::tensor(TensorValue(constDims[0]), TensorValue(constDims[1]), 1, 1); + break; + case 1: constTensor = cldnn::tensor(1, TensorValue(constDims[0]), 1, 1); + break; + default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions"); + } + cldnn::layout constLayout = cldnn::layout( + DataTypeFromPrecision(layer->blobs.begin()->second->getTensorDesc().getPrecision()), + FormatFromLayout(constBlob->getTensorDesc().getLayout()), + constTensor); + + cldnn::primitive_id initialconstPrimID = layer_type_name_ID(layer); + cldnn::primitive_id constPrimID = CreatePrimitiveFromBlob(topology, initialconstPrimID, constBlob, constLayout); + primitiveIDs[initialconstPrimID] = constPrimID; + primitivesToIRLayersMap[initialconstPrimID] = { layer->name }; +} + +void Program::CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto convLayer = as(layer); + std::string convLayerName = layer_type_name_ID(layer); + + std::vector weightPrimID; + std::vector biasPrimID; + CreateWeightAndBiasPrimitives(topology, layer, weightPrimID, biasPrimID); + + auto allPads = getPaddings(*convLayer); + int x_pad = allPads.begin[X_AXIS], y_pad = allPads.begin[Y_AXIS]; + cldnn::tensor stride, padding, dilation; + if (convLayer->input()->getTensorDesc().getDims().size() > 4) { + stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(convLayer->_stride[X_AXIS], + convLayer->_stride[Y_AXIS], + convLayer->_stride[Z_AXIS])); + int z_pad = allPads.begin[Z_AXIS]; + padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), + cldnn::spatial(-x_pad, -y_pad, -z_pad)); + dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(convLayer->_dilation[X_AXIS], + convLayer->_dilation[Y_AXIS], + convLayer->_dilation[Z_AXIS])); + + } else { + stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(convLayer->_stride[X_AXIS], convLayer->_stride[Y_AXIS])); + padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), + cldnn::spatial(-x_pad, -y_pad, 0)); + dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(convLayer->_dilation[X_AXIS], convLayer->_dilation[Y_AXIS])); + } + + auto convPrim = cldnn::convolution(convLayerName, + inputPrimitives[0], + weightPrimID, + biasPrimID, + stride, + padding, + dilation, + false, + 0.0f, + CldnnTensorFromIEDims(convLayer->outData[0]->getTensorDesc().getDims())); + + if (convLayer->precision == Precision::I8 || convLayer->precision == Precision::U8) { + // Output data type forcing is possible for u8/i8 -> fp32 only + convPrim.output_data_type = DataTypeFromPrecision(convLayer->outData[0]->getTensorDesc().getPrecision()); + } + + if (convLayer->_group >= 16) { + convPrim.groups = convLayer->_group; + } + + std::vector wScalePrimID; + CreateQuantizationPrimitives(topology, layer, wScalePrimID, true, weightPrimID.size()); + + if (!wScalePrimID.empty()) { + // TODO Fix in clDNN - there is no reason this should be immutable, most other fields are mutable + auto& wq = const_cast&>(convPrim.weights_quantization_factors.ref()); + wq.insert(wq.end(), wScalePrimID.begin(), wScalePrimID.end()); + } + + topology.add(convPrim); + primitivesToIRLayersMap[convLayerName] = { layer->name }; + primitiveIDs[convLayerName] = convLayerName; + profilingIDs.push_back(convLayerName); +} + +void Program::CreateDeformableConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto defConvLayer = as(layer); + + std::vector weightPrimID; + std::vector biasPrimID; + CreateWeightAndBiasPrimitives(topology, layer, weightPrimID, biasPrimID); + + cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(defConvLayer->_stride[X_AXIS], defConvLayer->_stride[Y_AXIS], 1)); + auto allPad = getPaddings(*defConvLayer); + cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), + cldnn::spatial(-allPad.begin[X_AXIS], -allPad.begin[Y_AXIS], 0)); + cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(defConvLayer->_dilation[X_AXIS], defConvLayer->_dilation[Y_AXIS], 1)); + + cldnn::tensor kernel = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(defConvLayer->_kernel[X_AXIS], defConvLayer->_kernel[Y_AXIS], 1)); + + const uint32_t deformable_group = defConvLayer->GetParamAsUInt("deformable_group", 1); + if (defConvLayer->_group > 1) { + std::string defConvLayerName = layer_type_name_ID(layer); + auto defConvPrim = cldnn::convolution(defConvLayerName, + inputPrimitives[0], + inputPrimitives[1], + weightPrimID, + biasPrimID, + defConvLayer->_group, + deformable_group, + stride, + padding, + dilation, + CldnnTensorFromIEDims(defConvLayer->outData[0]->getTensorDesc().getDims())); + topology.add(defConvPrim); + primitivesToIRLayersMap[defConvLayerName] = { layer->name }; + primitiveIDs[defConvLayerName] = defConvLayerName; + profilingIDs.push_back(defConvLayerName); + } else { + std::string defConvLayerNameInterp = layer_type_name_ID(layer)+"_interp"; + std::string defConvLayerNameConv = layer_type_name_ID(layer); + auto defConvPrimInterp = cldnn::deformable_interp(defConvLayerNameInterp, + inputPrimitives[0], + inputPrimitives[1], + defConvLayer->_group, + deformable_group, + stride, + padding, + dilation, + CldnnTensorFromIEDims(defConvLayer->outData[0]->getTensorDesc().getDims()), + kernel); + topology.add(defConvPrimInterp); + primitivesToIRLayersMap[defConvLayerNameInterp] = { layer->name }; + primitiveIDs[defConvLayerNameInterp] = defConvLayerNameInterp; + profilingIDs.push_back(defConvLayerNameInterp); + auto defConvPrim = cldnn::deformable_conv(defConvLayerNameConv, + defConvLayerNameInterp, + weightPrimID, + biasPrimID, + defConvLayer->_group, + CldnnTensorFromIEDims(defConvLayer->outData[0]->getTensorDesc().getDims())); + topology.add(defConvPrim); + primitivesToIRLayersMap[defConvLayerNameConv] = { layer->name }; + primitiveIDs[defConvLayerNameConv] = defConvLayerNameConv; + profilingIDs.push_back(defConvLayerNameConv); + } +} + +void Program::CreateBinaryConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto binaryConvLayer = as(layer); + + if (binaryConvLayer->_group != 1) + THROW_CLDNN_EXCEPTION("BinaryConvolution with groups is not supported yet"); + + std::vector weightPrimID; + std::vector biasPrimID; + CreateBinaryWeightAndBiasPrimitives(topology, layer, weightPrimID, biasPrimID); + cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(binaryConvLayer->_stride[X_AXIS], binaryConvLayer->_stride[Y_AXIS])); + auto allPad = getPaddings(*binaryConvLayer); + int x_pad = allPad.begin[X_AXIS], y_pad = allPad.begin[Y_AXIS]; + cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0), + cldnn::spatial(-x_pad, -y_pad)); + cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), + cldnn::spatial(binaryConvLayer->_dilation[X_AXIS], binaryConvLayer->_dilation[Y_AXIS])); + + cldnn::data_types calc_precision = DataTypeFromPrecision(binaryConvLayer->precision); + std::string binaryConvLayerName = layer_type_name_ID(layer); + auto binaryConvPrim = cldnn::binary_convolution(binaryConvLayerName, + inputPrimitives[0], + weightPrimID, + stride, + padding, + dilation, + CldnnTensorFromIEDims(binaryConvLayer->outData[0]->getTensorDesc().getDims()), + binaryConvLayer->_group, + binaryConvLayer->_pad_value, + calc_precision); + + primitivesToIRLayersMap[binaryConvLayerName] = { layer->name }; + primitiveIDs[binaryConvLayerName] = binaryConvLayerName; + topology.add(binaryConvPrim); + profilingIDs.push_back(binaryConvLayerName); +} + +void Program::CreateQuantizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 5); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto quantizationLayer = as(layer); + + auto input_low_id = inputPrimitives[1]; + auto input_high_id = inputPrimitives[2]; + auto output_low_id = inputPrimitives[3]; + auto output_high_id = inputPrimitives[4]; + + int levels = quantizationLayer->levels; + std::string quantizeLayerName = layer_type_name_ID(layer); + auto quantizationPrim = cldnn::quantize(quantizeLayerName, + inputPrimitives[0], + input_low_id, + input_high_id, + output_low_id, + output_high_id, + levels); + + primitivesToIRLayersMap[quantizeLayerName] = { layer->name }; + primitiveIDs[quantizeLayerName] = quantizeLayerName; + topology.add(quantizationPrim); + profilingIDs.push_back(quantizeLayerName); +} + +void Program::CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto gatherLayer = as (layer); + + int axis = gatherLayer->GetParamAsInt("axis", 0); + + // Be careful, TensorFlow consist negative axis interpretation bug. Here: -3 = b, -2 = f, -1 = y, but must be -3 = f, -2 = y, -1 = x + auto cldnnAxisFromIE = [](int axis) { + switch (axis) { + case 0: return cldnn::gather::gather_axis::along_b; + case 1: return cldnn::gather::gather_axis::along_f; + case 2: return cldnn::gather::gather_axis::along_y; + case 3: return cldnn::gather::gather_axis::along_x; + case -1: return cldnn::gather::gather_axis::along_y; + case -2: return cldnn::gather::gather_axis::along_f; + case -3: return cldnn::gather::gather_axis::along_b; + default: THROW_CLDNN_EXCEPTION("Unsupported gather axis: " << axis); + } + }; + + std::string gatherLayerName = layer_type_name_ID(layer); + auto gatherPrim = cldnn::gather( + gatherLayerName, + inputPrimitives[0], + inputPrimitives[1], + cldnnAxisFromIE(axis), + CldnnTensorFromIEDims(gatherLayer->outData[0]->getTensorDesc().getDims())); + + primitivesToIRLayersMap[gatherLayerName] = { layer->name }; + primitiveIDs[gatherLayerName] = gatherLayerName; + topology.add(gatherPrim); + profilingIDs.push_back(gatherLayerName); +} + +void Program::CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto depthToSpace = as (layer); + + size_t blockSize = depthToSpace->GetParamAsInt("block_size", 2); + + auto inputDim = depthToSpace->input().get()->getTensorDesc().getDims(); + if (inputDim.size() != 4) + THROW_CLDNN_EXCEPTION("Unsupported size of tensor " << inputDim.size()); + + size_t blockSizeSquare = blockSize * blockSize; + + if (inputDim[1] % blockSizeSquare != 0) + THROW_CLDNN_EXCEPTION("The depth of the input tensor must be divisible by squared block size = " << blockSizeSquare); + + std::string depthToSpaceName = layer_type_name_ID(layer); + auto depthToSpacePrim = cldnn::depth_to_space( + depthToSpaceName, + inputPrimitives[0], + blockSize); + + primitivesToIRLayersMap[depthToSpaceName] = { layer->name }; + primitiveIDs[depthToSpaceName] = depthToSpaceName; + topology.add(depthToSpacePrim); + profilingIDs.push_back(depthToSpaceName); +} + +void Program::CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto shuffleChannels = as (layer); + const int32_t numberOfDims = shuffleChannels->input()->getDims().size(); + + int32_t group = shuffleChannels->GetParamAsInt("group", 1); + int32_t axis = shuffleChannels->GetParamAsInt("axis", 1); + + if (axis < 0) + axis += numberOfDims; + + if (axis < 0 || axis >= numberOfDims) + THROW_CLDNN_EXCEPTION("Incorrect axis value! Actual axis is" + std::to_string(group)); + + if (group < 1) + THROW_CLDNN_EXCEPTION("Invalid group size value (should equal at least one). Actual block size is" + + std::to_string(group)); + + if (shuffleChannels->input().get()->getDims()[axis] % group != 0) + THROW_CLDNN_EXCEPTION("Group parameter must evenly divide the channel dimension. Actual group size is " + + std::to_string(axis)); + + std::string shuffleChannelsName = layer_type_name_ID(layer); + auto shuffleChannelsPrim = cldnn::shuffle_channels( + shuffleChannelsName, + inputPrimitives[0], + group, + axis); + + primitivesToIRLayersMap[shuffleChannelsName] = { layer->name }; + primitiveIDs[shuffleChannelsName] = shuffleChannelsName; + topology.add(shuffleChannelsPrim); + profilingIDs.push_back(shuffleChannelsName); +} + +void Program::CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto stridedSliceLayer = as (layer); + + auto tmp = stridedSliceLayer->GetParamAsUInts("end_mask"); + std::vector end_mask(tmp.begin(), tmp.end()); + tmp = stridedSliceLayer->GetParamAsUInts("begin_mask"); + std::vector begin_mask(tmp.begin(), tmp.end()); + tmp = stridedSliceLayer->GetParamAsUInts("new_axis_mask"); + std::vector new_axis_mask(tmp.begin(), tmp.end()); + tmp = stridedSliceLayer->GetParamAsUInts("shrink_axis_mask"); + std::vector shrink_axis_mask(tmp.begin(), tmp.end()); + + std::string stridedSliceLayerName = layer_type_name_ID(layer); + auto stridedSlicePrim = cldnn::strided_slice( + stridedSliceLayerName, + inputPrimitives[0], inputPrimitives[1], inputPrimitives[2], inputPrimitives[3], + begin_mask, end_mask, new_axis_mask, shrink_axis_mask); + + primitivesToIRLayersMap[stridedSliceLayerName] = { layer->name }; + primitiveIDs[stridedSliceLayerName] = stridedSliceLayerName; + topology.add(stridedSlicePrim); + profilingIDs.push_back(stridedSliceLayerName); +} + +void Program::CreateReverseSequencePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto reverseSequence = as (layer); + + const auto input = reverseSequence->insData[0].lock()->getDims(); + const auto sequence_lengths = reverseSequence->insData[1].lock()->getDims(); + + int32_t batch_axis = reverseSequence->GetParamAsInt("batch_axis", 0); + int32_t seq_axis = reverseSequence->GetParamAsInt("seq_axis", 1); + + if (batch_axis < 0) + batch_axis += input.size(); + + if (seq_axis < 0) + seq_axis += input.size(); + + if (batch_axis == seq_axis) + THROW_CLDNN_EXCEPTION("Batch axis and sequence axis should not be equal\n"); + + if (seq_axis < 0 || seq_axis >= input.size()) + THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(seq_axis)); + + if (batch_axis < 0 || batch_axis >= input.size()) + THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(batch_axis)); + + if (sequence_lengths[0] != input[batch_axis]) + THROW_CLDNN_EXCEPTION("Sequence lengths must be a vector of length " + std::to_string(input[batch_axis]) + + "! Actual axis is " + std::to_string(sequence_lengths[0])); + + std::string reverseSequenceLayerName = layer_type_name_ID(layer); + auto reverseSequencePrim = cldnn::reverse_sequence( + reverseSequenceLayerName, + inputPrimitives[0], + inputPrimitives[1], + seq_axis, + batch_axis); + + primitivesToIRLayersMap[reverseSequenceLayerName] = { layer->name }; + primitiveIDs[reverseSequenceLayerName] = reverseSequenceLayerName; + topology.add(reverseSequencePrim); + profilingIDs.push_back(reverseSequence->name); +} + +void Program::CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto broadcast = as(layer); + + std::string broadcastLayerName = layer_type_name_ID(layer); + auto broadcastPrim = cldnn::broadcast( + broadcastLayerName, + inputPrimitives[0], + CldnnTensorFromIEDims(broadcast->outData[0]->getTensorDesc().getDims())); + + primitiveIDs[broadcastLayerName] = broadcastLayerName; + topology.add(broadcastPrim); + profilingIDs.push_back(broadcast->name); +} + +void Program::CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + bool threeInputs = layer->insData.size() == 3; + + if (threeInputs) { + ValidateLayer(layer, 3); + } else { + ValidateLayer(layer, 2); + } + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto gemmLayer = as(layer); + auto gemmLayerName = layer_type_name_ID(layer); + + auto outDims = layer->outData[0]->getTensorDesc().getDims(); + auto outDimsN = outDims.size(); + + auto gemmSpecificTensor = [](const InferenceEngine::SizeVector& dims) { + switch (dims.size()) { + case 2: return cldnn::tensor(cldnn::spatial(dims[1], dims[0])); + case 3: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::spatial(dims[2], dims[1])); + case 4: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[3], dims[2])); + case 5: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[4], dims[3], dims[2])); + case 6: return cldnn::tensor(cldnn::batch(dims[0]), cldnn::feature(dims[1]), cldnn::spatial(dims[5], dims[4], dims[3], dims[2])); + default: THROW_CLDNN_EXCEPTION("Invalid dimensions size(" << dims.size() << ") for Gemm layer"); + } + }; + + // Preprocess inputs + for (size_t i = 0; i < inputPrimitives.size(); ++i) { + auto inputDims = layer->insData[i].lock()->getTensorDesc().getDims(); + auto inputDimsN = inputDims.size(); + + // Add reorder if changing number of dimensions requires changing format + auto targetFormat = defaultFormatForDims(outDimsN); + + if (targetFormat.value != defaultFormatForDims(inputDimsN).value) { + auto reorderName = gemmLayerName + "_cldnn_in" + std::to_string(i) + "_reorder"; + auto targetDatatype = DataTypeFromPrecision(layer->precision); + auto reorderPrim = cldnn::reorder(reorderName, inputPrimitives[i], targetFormat, targetDatatype); + + topology.add(reorderPrim); + primitivesToIRLayersMap[reorderName] = { layer->name }; + profilingIDs.push_back(reorderName); + primitiveIDs[reorderName] = reorderName; + + inputPrimitives[i] = reorderName; + } + + // Reshape input if they differ or gemm specific shape matches default one + if (inputDimsN != outDimsN || inputDimsN < 4) { + auto reshapeName = gemmLayerName + "_cldnn_in" + std::to_string(i) + "_reshape"; + + // Extend input dimensions by prepending ones + inputDims.insert(inputDims.begin(), outDimsN - inputDimsN, 1ul); + + auto targetShape = gemmSpecificTensor(inputDims); + + auto reshapePrim = cldnn::reshape(reshapeName, inputPrimitives[i], targetShape); + + topology.add(reshapePrim); + primitivesToIRLayersMap[reshapeName] = { layer->name }; + profilingIDs.push_back(reshapeName); + primitiveIDs[reshapeName] = reshapeName; + + inputPrimitives[i] = reshapeName; + } + } + + // Add actual gemm + auto alpha = gemmLayer->alpha; + auto beta = gemmLayer->beta; + auto transA = gemmLayer->transpose_a; + auto transB = gemmLayer->transpose_b; + + auto gemmPrim = cldnn::gemm( + gemmLayerName, + inputPrimitives, + transA, + transB, + alpha, + beta); + + topology.add(gemmPrim); + primitivesToIRLayersMap[gemmLayerName] = { layer->name }; + profilingIDs.push_back(gemmLayerName); + + auto lastLayerName = gemmLayerName; + + // Reshape output if gemm specific shape does not match default one + if (outDimsN < 4) { + auto outputShape = CldnnTensorFromIEDims(outDims); + auto outReshapeName = gemmLayerName + "_cldnn_out_reshape"; + auto outReshapePrim = cldnn::reshape(outReshapeName, gemmLayerName, outputShape); + + topology.add(outReshapePrim); + primitivesToIRLayersMap[outReshapeName] = { layer->name }; + profilingIDs.push_back(outReshapeName); + primitiveIDs[outReshapeName] = outReshapeName; + + lastLayerName = outReshapeName; + } + + primitiveIDs[gemmLayerName] = lastLayerName; +} + + +void Program::CreateReducePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 2); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto reduce = as(layer); + auto input = reduce->insData[0].lock(); + size_t reduceDimNumber = input->getTensorDesc().getDims().size(); + + auto axesInput = layer->insData[1].lock(); + auto axesInputCreator = axesInput->getCreatorLayer().lock(); + + std::vector rawAxes; + if (axesInputCreator->blobs.size() == 1) { + auto constantBlob = axesInputCreator->blobs.begin()->second; + auto axesPrecision = constantBlob->getTensorDesc().getPrecision(); + if (axesPrecision == InferenceEngine::Precision::FP32) { + auto data = constantBlob->buffer().as(); + for (size_t i = 0; i < constantBlob->size(); ++i) + rawAxes.push_back(data[i]); + } else if (axesPrecision == InferenceEngine::Precision::I32) { + auto data = constantBlob->buffer().as(); + for (size_t i = 0; i < constantBlob->size(); ++i) + rawAxes.push_back(data[i]); + } else { + THROW_IE_EXCEPTION << layer->name << " Incorrect Reduce axes input Precision"; + } + } + + std::vector axes; + for (size_t a = 0; a < rawAxes.size(); a++) { + if (rawAxes[a] < 0) + rawAxes[a] = rawAxes[a] + reduceDimNumber; + if (rawAxes[a] < 0 || rawAxes[a] > reduceDimNumber - 1) + THROW_IE_EXCEPTION << layer->name << " Incorrect Reduce axis value: " << rawAxes[a]; + if (reduceDimNumber == 6) { + switch (rawAxes[a]) { + case 0: axes.push_back(cldnn::reduce::along_b); break; + case 1: axes.push_back(cldnn::reduce::along_f); break; + case 2: axes.push_back(cldnn::reduce::along_w); break; + case 3: axes.push_back(cldnn::reduce::along_z); break; + case 4: axes.push_back(cldnn::reduce::along_y); break; + case 5: axes.push_back(cldnn::reduce::along_x); break; + } + } else if (reduceDimNumber == 5) { + switch (rawAxes[a]) { + case 0: axes.push_back(cldnn::reduce::along_b); break; + case 1: axes.push_back(cldnn::reduce::along_f); break; + case 2: axes.push_back(cldnn::reduce::along_z); break; + case 3: axes.push_back(cldnn::reduce::along_y); break; + case 4: axes.push_back(cldnn::reduce::along_x); break; + } + } else { + switch (rawAxes[a]) { + case 0: axes.push_back(cldnn::reduce::along_b); break; + case 1: axes.push_back(cldnn::reduce::along_f); break; + case 2: axes.push_back(cldnn::reduce::along_y); break; + case 3: axes.push_back(cldnn::reduce::along_x); break; + } + } + } + + sort(axes.begin(), axes.end()); + axes.erase(unique(axes.begin(), axes.end()), axes.end()); + + cldnn::reduce_mode mode; + std::string reduceType = layer->type; + if (reduceType == "ReduceMax") mode = cldnn::reduce_mode::max; + else if (reduceType == "ReduceMin") mode = cldnn::reduce_mode::min; + else if (reduceType == "ReduceMean") mode = cldnn::reduce_mode::mean; + else if (reduceType == "ReduceProd") mode = cldnn::reduce_mode::prod; + else if (reduceType == "ReduceSum") mode = cldnn::reduce_mode::sum; + else if (reduceType == "ReduceAnd") mode = cldnn::reduce_mode::logical_and; + else if (reduceType == "ReduceOr") mode = cldnn::reduce_mode::logical_or; + else if (reduceType == "ReduceSumSquare") mode = cldnn::reduce_mode::sum_square; + else if (reduceType == "ReduceL1") mode = cldnn::reduce_mode::l1; + else if (reduceType == "ReduceL2") mode = cldnn::reduce_mode::l2; + else if (reduceType == "ReduceLogSum") mode = cldnn::reduce_mode::log_sum; + else if (reduceType == "ReduceLogSumExp") mode = cldnn::reduce_mode::log_sum_exp; + else + THROW_IE_EXCEPTION << layer->name << " Incorrect Reduce layer type!"; + + std::string reduceLayerName = layer_type_name_ID(layer); + auto reducePrim = cldnn::reduce( + reduceLayerName, + inputPrimitives[0], + mode, + axes, + static_cast(reduce->keep_dims)); + + primitiveIDs[reduceLayerName] = reduceLayerName; + topology.add(reducePrim); + profilingIDs.push_back(reduce->name); +} + +void Program::CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer) { + ValidateLayer(layer, 1); + + auto inputPrimitives = GetPrevLayersPrimitives(layer); + auto oneHot = as(layer); + + int16_t axis = oneHot->GetParamAsInt("axis", -1); + float on_value = layer->GetParamAsFloat("on_value", 1.0f); + float off_value = layer->GetParamAsFloat("off_value", 0.0f); + auto dims = oneHot->input()->getDims(); + + if (axis < -1 || axis > static_cast(dims.size())) + THROW_IE_EXCEPTION << layer->name << " Incorrect OneHot axis value: " << axis << ". Should be between -1 and " << dims.size(); + + if (axis == -1) { + axis = dims.size(); + for (int i = dims.size() - 1; i >= 0; i--) { + if (dims[i] == 1) + axis--; + else + break; + } + } + + std::string oneHotLayerName = layer_type_name_ID(layer); + auto oneHotPrim = cldnn::one_hot( + oneHotLayerName, + inputPrimitives[0], + CldnnTensorFromIEDims(oneHot->outData[0]->getDims()), + static_cast(axis), + on_value, + off_value); + + primitiveIDs[oneHotLayerName] = oneHotLayerName; + topology.add(oneHotPrim); + profilingIDs.push_back(oneHot->name); +} + +bool Program::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const { + if (splitLayer->outData.size() != 2) return false; // split into 2 + + for (auto out : splitLayer->outData) { + if (out->getInputTo().size() != 1) { + return false; + } + } + + auto convLayer1 = + tryAs (GetNextSingleLayer(splitLayer->outData[0])); + auto convLayer2 = + tryAs (GetNextSingleLayer(splitLayer->outData[1])); + if (!convLayer1 || !convLayer2) { // outputs aren't convolutions + return false; + } + auto allPad1 = getPaddings(*convLayer1); + auto allPad2 = getPaddings(*convLayer2); + if (convLayer1->precision != convLayer2->precision // wrong precision + || convLayer1->_fusedWith || convLayer2->_fusedWith // convolutions are fused + || convLayer1->outData.size() != 1 || convLayer2->outData.size() != 1 // more than 1 output for convolutions + || allPad1.begin[X_AXIS] != allPad2.begin[X_AXIS] // different padding + || allPad1.begin[Y_AXIS] != allPad2.begin[Y_AXIS] // different padding + || convLayer1->_stride[X_AXIS] != convLayer2->_stride[X_AXIS] // different strides + || convLayer1->_stride[Y_AXIS] != convLayer2->_stride[Y_AXIS] // different strides + || convLayer1->_dilation[X_AXIS] != convLayer2->_dilation[X_AXIS] // different dilation + || convLayer1->_dilation[Y_AXIS] != convLayer2->_dilation[Y_AXIS] // different dilation + || (GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0])) // no merge after convolutions + != GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[1]))) + || (p_currentOutputs.find(convLayer1->name) != p_currentOutputs.end()) + || (p_currentOutputs.find(convLayer2->name) != p_currentOutputs.end())) { + return false; + } + auto concatLayer = + tryAs ( + GetNextSingleLayer(GetNextSingleLayer(splitLayer->outData[0]))); + if (!concatLayer || // not a merge layer + concatLayer->_axis != 1 || // merge on unsupported axis + concatLayer->outData.size() != 1) { // too many outputs + return false; + } + if (m_config.customLayers.find(convLayer1->type) != m_config.customLayers.end() || + m_config.customLayers.find(concatLayer->type) != m_config.customLayers.end()) { + return false; // convolution or concat were overwritten by a custom layer + } + + return true; +} + +void Program::AddInputPrimitive(cldnn::topology& topology, InferenceEngine::InputInfo::Ptr inputInfo, Precision inputPrecision, const std::string inputName) { + // first create and add the input layout + const auto inputDesc = inputInfo->getTensorDesc(); + const auto inputDims = inputDesc.getDims(); + InferenceEngine::Layout l = inputDesc.getLayout(); + const std::string& layoutName = DebugOptions::IELayoutToString(l); + auto consumers = inputInfo->getInputData()->getInputTo(); + + cldnn::format inputFormat = m_defaultFormat; + if (InferenceEngine::Layout::BLOCKED == l && 6 == inputDims.size()) + inputFormat = cldnn::format::bfwzyx; + else + inputFormat = FormatFromLayout(l); + cldnn::tensor dataTensor; + cldnn::tensor::value_type batch = (m_max_batch <= 1) + ? (inputDims.size() > 3 ? TensorValue(inputDims[0]) : 1) + : TensorValue(m_curBatch); + switch (inputDims.size()) { + case 6: + dataTensor = cldnn::tensor(cldnn::batch(batch), + cldnn::feature(inputDims[1]), + cldnn::spatial(inputDims[5], inputDims[4], inputDims[3], inputDims[2])); + break; + case 5: + if (InferenceEngine::Layout::NCDHW == l) { + dataTensor = cldnn::tensor(cldnn::batch(batch), + cldnn::feature(inputDims[1]), + cldnn::spatial(inputDims[4], inputDims[3], inputDims[2])); + } else { + THROW_CLDNN_EXCEPTION("Unsupported layout (" << layoutName << ") in 5D input " + inputInfo->name()); + } + break; + case 4: + if (InferenceEngine::Layout::NCHW == l || InferenceEngine::Layout::CHW == l) { + dataTensor = cldnn::tensor(batch, + TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2])); + } else if (InferenceEngine::Layout::NHWC == l) { + dataTensor = cldnn::tensor(batch, + TensorValue(inputDims[1]), TensorValue(inputDims[3]), TensorValue(inputDims[2])); + } else { + THROW_CLDNN_EXCEPTION("Unsupported layout (" << layoutName << ") in 4D input " + inputInfo->name()); + } + break; + case 3: + if (InferenceEngine::Layout::CHW == l) { + dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[2])); + } else { + THROW_CLDNN_EXCEPTION("Unsupported layout (" << layoutName << ") in 3D input " + inputInfo->name()); + } + break; + case 2: + if (InferenceEngine::Layout::NCHW == l || InferenceEngine::NC == l) { + dataTensor = cldnn::tensor(TensorValue(inputDims[0]), TensorValue(inputDims[1]), 1, 1); + } else { + THROW_CLDNN_EXCEPTION("Unsupported layout (" << layoutName << ") in 2D input " + inputInfo->name()); + } + break; + case 1: + dataTensor = cldnn::tensor(TensorValue(inputDims[0]), 1, 1, 1); + break; + default: THROW_CLDNN_EXCEPTION("Invalid data dimensions"); + } + cldnn::layout inputLayout(DataTypeFromPrecision(inputDesc.getPrecision()), + inputFormat, + dataTensor); + + // save the input dims + inputLayouts.insert({ inputInfo->name(), inputLayout }); + + topology.add(cldnn::input_layout(inputName, inputLayout)); + primitivesToIRLayersMap[inputName] = { inputInfo->name() }; + + // create preprocess primitive for this input + auto preProcess = inputInfo->getPreProcess(); + + size_t meanChannels = preProcess.getNumberOfChannels(); + inputLayout.format = inputFormat; + inputLayout.size = inputLayout.size.transform(inputFormat, 1); + inputLayout.data_type = DataTypeFromPrecision(inputPrecision); + auto preprocessPrimID = inputName + m_preProcessTag; + + if ((meanChannels > 0) && + (meanChannels != inputLayout.size.feature[0])) { + THROW_CLDNN_EXCEPTION("Mismatched mean values channels in input " + inputName); + } + + switch (preProcess.getMeanVariant()) { + case NONE: + case MEAN_VALUE: { + std::vector meanValues; + if (meanChannels > 0) { + for (size_t c = 0; c < meanChannels; c++) { + if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10) + THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName); + meanValues.push_back(preProcess[c]->meanValue); + } + } + topology.add(cldnn::reorder(preprocessPrimID, inputName, inputLayout, meanValues)); + primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() }; + profilingIDs.push_back(preprocessPrimID); + InitProfileInfo(preprocessPrimID, "Reorder"); + } + break; + + case MEAN_IMAGE: { + IE_ASSERT(meanChannels); + // first merge all mean values to a single blob + // todo make sure mean blob precision is the same as the input precision + auto meanDims = inputDims; + // overwrite batches with 1 + switch (meanDims.size()) { + case 4: meanDims[0] = 1; + break; + default: + THROW_CLDNN_EXCEPTION("Missing batch dimensions in input image"); + } + const TensorDesc desc(Precision(Precision::FP32), meanDims, TensorDesc::getLayoutByDims(meanDims)); + InferenceEngine::TBlob meanBlob(desc); + meanBlob.allocate(); + auto meanBlobData = meanBlob.data(); + for (size_t c = 0; c < meanChannels; c++) { + if (fabs(preProcess[c]->stdScale - 1.0f) > 1e-10) + THROW_CLDNN_EXCEPTION("not supporting stdScale yet in input " + inputName); + auto channelMeanBlob = std::dynamic_pointer_cast>(preProcess[c]->meanData); + auto channelSize = channelMeanBlob->size(); + auto channelBlobData = channelMeanBlob->data(); + for (size_t i = 0; i < channelSize; i++) { + meanBlobData[(c * channelSize) + i] = channelBlobData[i]; + } + } + // then create a data primitive for the mean values + auto meanBlobPtr = std::make_shared>(meanBlob); + + // mean values will use external format (sub in the input format before convert to new format) + cldnn::tensor meanBlobTensor(inputLayout.size); + meanBlobTensor.batch[0] = 1; // mean values have no batches + cldnn::layout meanBlobLayout(cldnn::data_types::f32, m_defaultFormat, meanBlobTensor); + cldnn::primitive_id meanBlobID = inputName + m_meanValuesTag; + meanBlobID = CreatePrimitiveFromBlob(topology, + meanBlobID, + meanBlobPtr, + meanBlobLayout); + topology.add(cldnn::reorder(preprocessPrimID, + inputName, + inputLayout, + meanBlobID)); + primitivesToIRLayersMap[preprocessPrimID] = { inputInfo->name() }; + profilingIDs.push_back(preprocessPrimID); + InitProfileInfo(preprocessPrimID, "Reorder"); + break; + } + default: THROW_CLDNN_EXCEPTION("Invalid mean variant in input " + inputName); + break; + } + primitiveIDs[inputName] = preprocessPrimID; + primitiveIDs[preprocessPrimID] = preprocessPrimID; +} + +std::vector Program::GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const { + if (layer == nullptr) { + return {}; + } + std::vector inputPrimitives; + for (auto inputData : layer->insData) { + auto prevData = inputData.lock(); + if (prevData == nullptr) { + THROW_CLDNN_EXCEPTION("Nonexistent input for layer: " << layer->name); + } + auto prevCreator = prevData->getCreatorLayer().lock(); + std::string prevName; + + if (prevCreator) { + prevName = layer_type_lower(prevCreator) + ":"; + if (prevCreator->outData.size() > 1) + prevName += prevData->getName(); + else + prevName += prevCreator->name; + } else { + prevName = prevData->getName(); + } + inputPrimitives.push_back(primitiveIDs.at(prevName)); + } + return inputPrimitives; +} + +void Program::AddOutputPrimitive(cldnn::topology& topology, std::string outputName, const InferenceEngine::DataPtr outputData, Precision outputPrecision) { + const auto outputDesc = outputData->getTensorDesc(); + const auto outputlayout = outputDesc.getLayout(); + + // TODO: add precision check once there's an outputInfo object + if (outputlayout != InferenceEngine::NCHW && + // TODO: change 6d case once new layout added in IE + outputlayout != InferenceEngine::BLOCKED && + outputlayout != InferenceEngine::NCDHW && + outputlayout != InferenceEngine::NHWC && + outputlayout != InferenceEngine::CHW && + outputlayout != InferenceEngine::NC && + outputlayout != InferenceEngine::C) { + THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(outputlayout) << ") in output: " << outputName); + } + + auto outputCreator = outputData->getCreatorLayer().lock(); + std::string outLayerName = layer_type_lower(outputCreator) + ":"; + + if (outputCreator->outData.size() > 1) + outLayerName += outputName; + else + outLayerName += outputCreator->name; + + auto outputReorderID = outputName + m_postProcessTag; + Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision; + + // Find correct output ID. Start with name stored in IR. + std::string outputID = outLayerName; + std::string finalID = primitiveIDs.at(outLayerName); + + while (outputID != finalID) { + auto prim = primitiveIDs.find(finalID); + + if (prim == primitiveIDs.end()) { + THROW_IE_EXCEPTION << "Unknown output primitive id " << outputID; + } + outputID = finalID; + finalID = prim->second; + } + + topology.add(cldnn::reorder(outputReorderID, outputID, + FormatFromLayout(outputData->getLayout()), + DataTypeFromPrecision(precision))); + primitiveIDs[outputName] = outputReorderID; + profilingIDs.push_back(outputReorderID); + InitProfileInfo(outputReorderID, "Reorder"); + + outputDims[outputName] = outputDesc.getDims(); + prevPrimitiveIDs[outputReorderID] = {outputName}; +} + +void Program::AddSingleValuePrimitive(cldnn::topology& topology, cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value) { + cldnn::layout primLayout(dataType, m_defaultFormat, { 1, 1, 1, 1 }); + auto primMem = cldnn::memory::allocate(*m_engine, primLayout); + switch (dataType) { + case cldnn::data_types::f32: + { + auto tmpPointer = primMem.pointer(); // implicitly maps buffer - unmap in destructor + tmpPointer[0] = value; + } + break; + case cldnn::data_types::f16: + { + auto tmpPointer = primMem.pointer(); // implicitly maps buffer - unmap in destructor + cldnn_status status = CLDNN_SUCCESS; + tmpPointer[0] = cldnn_float_to_half(value, &status); + if (status != CLDNN_SUCCESS) { + THROW_CLDNN_EXCEPTION("Error converting value to fp16."); + } + } + break; + default: + THROW_CLDNN_EXCEPTION("Unhandled data type (precision)"); + } + + topology.add(cldnn::data(valPrimID, primMem)); +} + +cldnn::data_types Program::DataTypeFromPrecision(InferenceEngine::Precision p) { + switch (p) { + case Precision::I16: + case Precision::FP32: + return cldnn::data_types::f32; + case Precision::FP16: + return cldnn::data_types::f16; + case Precision::U8: + return cldnn::data_types::u8; + case Precision::I8: + return cldnn::data_types::i8; + case Precision::I32: + return cldnn::data_types::i32; + case Precision::BIN: + return cldnn::data_types::bin; + default: + THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision"; + break; + } +} + +cldnn::format Program::FormatFromLayout(InferenceEngine::Layout l) { + switch (l) { + // TODO: change 6d case once new layout added in IE + case InferenceEngine::Layout::BLOCKED: + return cldnn::format::bfwzyx; + case InferenceEngine::Layout::NCDHW: + return cldnn::format::bfzyx; + case InferenceEngine::Layout::NCHW: + case InferenceEngine::Layout::NC: + case InferenceEngine::Layout::CHW: + case InferenceEngine::Layout::C: + return cldnn::format::bfyx; + case InferenceEngine::Layout::NHWC: + return cldnn::format::byxf; + default: + THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << l << " layout"; + break; + } +} + +cldnn::upsampling_sample_type Program::UpsamplingTypeFromString(const std::string& str) { + static const caseless_map UpsamplingTypeNameToType = { + { "Bilinear" , cldnn::upsampling_sample_type::bilinear }, + { "Nearest" , cldnn::upsampling_sample_type::nearest }, + }; + auto it = UpsamplingTypeNameToType.find(str); + if (it != UpsamplingTypeNameToType.end()) + return it->second; + else + THROW_CLDNN_EXCEPTION("Unknown Upsampling type: " << str); +} + +cldnn::softmax::dimension_t Program::SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer) { + auto sz = softmaxLayer->input()->getTensorDesc().getDims().size(); + switch (softmaxLayer->axis) { + case 0: return cldnn::softmax::normalize_all; + case 1: return cldnn::softmax::normalize_f; + case 2: + if (sz > 4) + return cldnn::softmax::normalize_z; + else + return cldnn::softmax::normalize_y; + case 3: + if (sz > 4) + return cldnn::softmax::normalize_y; + else + return cldnn::softmax::normalize_x; + case 4: + return cldnn::softmax::normalize_x; + default: THROW_CLDNN_EXCEPTION("Invalid softmax axis " << softmaxLayer->axis); + } + return cldnn::softmax::normalize_fyx; +} + +cldnn::prior_box_code_type Program::PriorBoxCodeFromString(const std::string& str) { + static const std::map CodeNameToType = { + { "caffe.PriorBoxParameter.CORNER" , cldnn::prior_box_code_type::corner }, + { "caffe.PriorBoxParameter.CENTER_SIZE" , cldnn::prior_box_code_type::center_size }, + { "caffe.PriorBoxParameter.CORNER_SIZE" , cldnn::prior_box_code_type::corner_size }, + }; + auto it = CodeNameToType.find(str); + if (it != CodeNameToType.end()) { + return it->second; + } else { + THROW_CLDNN_EXCEPTION("Unknown Prior-Box code type: " + str); + return cldnn::prior_box_code_type::corner; + } +} + +Program::GenericBlobMap Program::CreateGenericLayerBlobPrimitives(cldnn::topology& topology, const InferenceEngine::GenericLayer* layer) { + IE_ASSERT(layer); + GenericBlobMap res; + for (auto& blob : layer->blobs) { + const auto blobDims = blob.second->getTensorDesc().getDims(); + if (blobDims.size() != 1) { + THROW_CLDNN_EXCEPTION("Unhandled blob dim in layer " + layer->name); + } + + cldnn::layout genericLayout(DataTypeFromPrecision(blob.second->getTensorDesc().getPrecision()), + m_defaultFormat, + (cldnn::tensor) cldnn::spatial(TensorValue(blobDims.back()))); + + cldnn::primitive_id initialWeightID = layer_type_name_ID(layer) + "_" + blob.first + m_weightsTag; + cldnn::primitive_id weightID = CreatePrimitiveFromBlob(topology, initialWeightID, blob.second, genericLayout); + res[initialWeightID] = weightID; + } + + return res; +} + +void Program::ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector& blobNames) { + IE_ASSERT(layer); + for (auto& name : blobNames) { + if (layer->blobs.find(name) == layer->blobs.end()) { + THROW_CLDNN_EXCEPTION("Missing blob " + name + " in layer " + layer->name); + } + } +} + +void Program::InitProfileInfo(const std::string& layerName, + const std::string& layerType, + bool isCPU, + InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) { + perfMap[layerType + ":" + layerName].first = layerName; + auto& perfEntry = perfMap[layerType + ":" + layerName].second; + perfEntry.layerType = layerType; + perfEntry.status = status; + perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0; + perfEntry.isCPU = isCPU; + perfEntry.status = status; +} + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_program.h b/inference-engine/src/cldnn_engine/cldnn_program.h new file mode 100644 index 00000000000000..25c7310cf3dee4 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_program.h @@ -0,0 +1,362 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "debug_options.h" +#include "cldnn_custom_layer.h" +#include "cldnn_config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef NDEBUG +#include +#include + +#define THROW_CLDNN_EXCEPTION(desc)\ +do { \ +InferenceEngineException ex(__FILE__, __LINE__);\ +std::cout << desc << "\n---\nException detected at " << __FILE__ << ":" << \ +__LINE__ << " (" << __FUNCTION__ << ")\n---\n" << std::endl; THROW_IE_EXCEPTION << desc; } while (0); +#else +#define THROW_CLDNN_EXCEPTION(desc) THROW_IE_EXCEPTION << desc; +#endif // NDEBUG +#define TensorValue(val) static_cast(val) + +namespace CLDNNPlugin { +template +LayerTypePtr tryAs(const InferenceEngine::CNNLayerPtr& in_ptr) { + return dynamic_cast(in_ptr.get()); +} + +template +LayerTypePtr as(const InferenceEngine::CNNLayerPtr& in_ptr) { + auto result_ptr = dynamic_cast (in_ptr.get()); + if (nullptr == result_ptr) { + THROW_IE_EXCEPTION << "CNNLayerPtr is not suitable for casting to requested layer type"; + } + return result_ptr; +} + +inline std::string layer_type_lower(const InferenceEngine::CNNLayer* layer) { + std::string layerType = layer->type; + std::transform(layerType.begin(), layerType.end(), layerType.begin(), + [](unsigned char c) -> unsigned char { return std::tolower(c); }); + return layerType; +} + +inline std::string layer_type_name_ID(const InferenceEngine::CNNLayer* layer) { + return layer_type_lower(layer) + ":" + layer->name; +} + +inline std::string layer_type_lower(InferenceEngine::CNNLayerPtr layer) { + return layer_type_lower(layer.get()); +} + +inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) { + return layer_type_name_ID(layer.get()); +} + +struct PerfCounter { + InferenceEngine::InferenceEngineProfileInfo::LayerStatus status; + bool isCPU; + uint64_t realTime_uSec; + uint64_t cpu_uSec; + uint32_t num; + std::string layerType; + +public: + PerfCounter() : realTime_uSec(0), cpu_uSec(0), num(0), + status(InferenceEngine::InferenceEngineProfileInfo::NOT_RUN), isCPU(false) {} + + long long realTime_avg() const { return (num == 0) ? 0 : realTime_uSec / num; } + long long cpu_avg() const { return (num == 0) ? 0 : cpu_uSec / num; } +}; + +class Program { +public: + Program(InferenceEngine::ICNNNetwork &network, std::shared_ptr engine, const Config& config); + std::shared_ptr getCompiledProgram(int program_id = 0); + + std::map primitiveIDs; + std::map> primitivesToIRLayersMap; + std::map> prevPrimitiveIDs; + std::map> perfMap; + + std::vector profilingIDs; + + std::map outputDims; + std::map inputLayouts; + std::map blobMemCache; + + int m_max_batch; + int m_curBatch; + + InferenceEngine::OutputsDataMap p_currentOutputs; + + std::vector GetPrevLayersPrimitives(const InferenceEngine::CNNLayerPtr layer) const; + const std::map& getInputLayouts() const { return inputLayouts; } + int GetMaxBatchSizeForSingleProgram(); + + + // internal types + enum LayerType { + Convolution, + DeformableConvolution, + ReLU, + ReLU6, + Sigmoid, + TanH, + ELU, + Activation, + Exp, + Asin, + Atan, + Acos, + Abs, + Asinh, + Acosh, + Atanh, + Not, + LRN, + Pooling, + FullyConnected, + SoftMax, + Power, + Split, + Concatenate, + Eltwise, + SimplerNMS, + ROIPooling, + Crop, + Deconvolution, + PriorBox, + DetectionOutput, + Normalize, + Reshape, + Permute, + Flatten, + BatchNormalization, + PReLU, + ScaleShift, + Proposal, + PSROIPooling, + Clamp, + Copy, + Upsampling, + Resample, + RegionYolo, + ReorgYolo, + ConstantBlob, + ArgMax, + ArgMin, + MVN, + Unpooling, + Tile, + Pad, + LSTMCell, + RNN, + Gather, + DepthToSpace, + ShuffleChannels, + StridedSlice, + Broadcast, + ReverseSequence, + BinaryConvolution, + Quantize, + Squeeze, + Unsqueeze, + Reduce, + TopK, + Floor, + Ceil, + Erf, + HardSigmoid, + Log, + Neg, + Reciprocal, + Selu, + Sign, + SoftPlus, + SoftSign, + Tan, + Gemm, + OneHot, + NO_TYPE + }; + using GenericBlobMap = std::map; + + static LayerType LayerTypeFromStr(const std::string& str); + +private: + std::vector> m_programs; + std::shared_ptr m_engine; + Config m_config; + + std::shared_ptr BuildProgram(InferenceEngine::ICNNNetwork &network); + + void InitProfileInfo(const std::string& layerName, + const std::string& layerType, + bool isCPU = false, + InferenceEngine::InferenceEngineProfileInfo::LayerStatus status + = InferenceEngine::InferenceEngineProfileInfo::EXECUTED); + + static const cldnn::primitive_id m_preProcessTag; + static const cldnn::primitive_id m_weightsTag; + static const cldnn::primitive_id m_biasesTag; + static const cldnn::primitive_id m_meanValuesTag; + static const cldnn::primitive_id m_postProcessTag; + static const cldnn::primitive_id m_scalesTag; + static const cldnn::primitive_id m_workaroundTag; + static const cldnn::primitive_id m_preCustomLayerTag; + static const cldnn::primitive_id m_postCustomLayerTag; + + + enum WeightRearrangeType { + BroadcastFeatures, + FlipDeconvDims, + NO_REARRANGE + }; + + cldnn::format m_defaultFormat; + void InitFormat(InferenceEngine::ICNNNetwork &network); + + static cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p); + static cldnn::format FormatFromLayout(InferenceEngine::Layout l); + static cldnn::upsampling_sample_type UpsamplingTypeFromString(const std::string& str); + + void Load(InferenceEngine::ICNNNetwork &network); + static cldnn::pooling_mode PoolingModeFromIEPooling(InferenceEngine::PoolingLayer::PoolType pt, bool excludePadding = false); + static cldnn::eltwise_mode EltwiseModeFromIEEltwise(InferenceEngine::EltwiseLayer::eOperation op); + static cldnn::prior_box_code_type PriorBoxCodeFromString(const std::string& str); + static cldnn::softmax::dimension_t SoftmaxDimensionFromIEAxis(const InferenceEngine::SoftMaxLayer* softmaxLayer); + cldnn::primitive_id CreatePrimitiveFromBlob(cldnn::topology& topology, + cldnn::primitive_id primID, + const InferenceEngine::Blob::Ptr pBlob, + const cldnn::layout& blobLayout, + size_t blobByteOffset = 0, + WeightRearrangeType rearrange = NO_REARRANGE); + void CreateWeightAndBiasPrimitives(cldnn::topology& topology, + const InferenceEngine::CNNLayerPtr& layer, + std::vector& weightsPrimID, + std::vector& biasesPrimID); + void CreateBinaryWeightAndBiasPrimitives(cldnn::topology& topology, + const InferenceEngine::CNNLayerPtr& layer, + std::vector& weightsPrimID, + std::vector& biasesPrimID); + void CreateScaleWeightsAndBiasesFromBN(cldnn::topology& topology, + const InferenceEngine::BatchNormalizationLayer* bnLayer, + cldnn::primitive_id& weightsPrimID, + cldnn::primitive_id& biasesPrimID); + void CreateQuantizationPrimitives(cldnn::topology& topology, + const InferenceEngine::CNNLayerPtr& layer, + std::vector& weightsQuantizationPrimID, + bool supportsDequantization, + size_t split = 1); + void AddPreProcessPrimitive(InferenceEngine::InputInfo::Ptr inputInfo); + void AddInputPrimitive(cldnn::topology& topology, + InferenceEngine::InputInfo::Ptr inputInfo, InferenceEngine::Precision inputPrecision, const std::string inputName); + void AddOutputPrimitive(cldnn::topology& topology, + std::string outputName, const InferenceEngine::DataPtr outputData, + InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::UNSPECIFIED); + void CreateSingleLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr& layer); + bool IsValidSplitConvMerge(const InferenceEngine::SplitLayer* splitLayer) const; + bool CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const; + static std::vector GetNextLayers(const InferenceEngine::DataPtr data); + static std::vector GetNextLayers(const InferenceEngine::CNNLayerPtr layer); + static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::DataPtr data); + static InferenceEngine::CNNLayerPtr GetNextSingleLayer(const InferenceEngine::CNNLayerPtr layer); + void AddSingleValuePrimitive(cldnn::topology& topology, cldnn::primitive_id valPrimID, cldnn::data_types dataType, float value); + + GenericBlobMap CreateGenericLayerBlobPrimitives(cldnn::topology& topology, const InferenceEngine::GenericLayer* layer); + static void ValidateGenericLayerBlobs(const InferenceEngine::GenericLayer* layer, const std::vector& blobNames); + static bool HasParam(const std::map& layerParams, std::string paramName) { + auto p = layerParams.find(paramName); + return p != layerParams.end(); + } + + void changeInputBatch(int batch); + + // Layer Primitive Creators + void CreatePReLUPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateBatchNormalizationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr & layer); + void CreateFlattenPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreatePermutePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateReshapePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateNormalizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateDetectionOutputPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreatePriorBoxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateDeconvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateCropPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateSimplerNMSPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateEltwisePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateConcatenatePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateSplitPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateFusedSplitConvMergePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreatePowerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateSoftMaxPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateFullyConnectedPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreatePoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateLRNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateActivationPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type); + void CreateConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateDeformableConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateScaleShiftPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateProposalPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreatePSROIPoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateCopyPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateUpsamplingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateResamplePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateYOLO2RegionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateYOLO2ReorgPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateArgMaxMinPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, const LayerType type); + void CreateTopKPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateMaxUnpoolingPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateMVNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateTilePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreatePadPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateRegularLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateDynamicLSTM(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateRNNPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateLSTMCellPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void AddConstantBlobInput(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateCustomLayerPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer, CLDNNCustomLayerPtr customLayer); + void CreateGatherPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateDepthToSpacePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateShuffleChannelsPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateStridedSlicePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateBroadcastPrimitive(cldnn::topology &topology, InferenceEngine::CNNLayerPtr &layer); + void CreateReverseSequencePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateBinaryConvolutionPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateQuantizePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateGemmPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateReducePrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); + void CreateOneHotPrimitive(cldnn::topology& topology, InferenceEngine::CNNLayerPtr &layer); +}; + +} // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_streams_task_executor.cpp b/inference-engine/src/cldnn_engine/cldnn_streams_task_executor.cpp new file mode 100644 index 00000000000000..b3fdae14843846 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_streams_task_executor.cpp @@ -0,0 +1,89 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include +#include "ie_blob.h" +#include "ie_plugin.hpp" +#include "cpp/ie_cnn_network.h" +#include "debug_options.h" +#include "inference_engine.hpp" +#include +#include +#include "ie_parallel.hpp" +#include "cldnn_streams_task_executor.h" + +namespace CLDNNPlugin { +std::atomic MultiWorkerTaskExecutor::waitingCounter(0u); + +thread_local MultiWorkerTaskContext MultiWorkerTaskExecutor::ptrContext; + +MultiWorkerTaskExecutor::MultiWorkerTaskExecutor(const std::vector& init_tasks, std::string name) : + _isStopped(false), _name(name), _initCount(0) { + for (auto& t : init_tasks) { + _threads.emplace_back([&, t] { + // initialization (no contention, every worker thread is doing it's own task) + t->runNoThrowNoBusyCheck(); + _initCount++; + + while (!_isStopped) { + bool isQueueEmpty; + InferenceEngine::Task::Ptr currentTask = nullptr; + { // waiting for the new task or for stop signal + std::unique_lock lock(_queueMutex); + _queueCondVar.wait(lock, [&]() { return !_taskQueue.empty() || _isStopped; }); + isQueueEmpty = _taskQueue.empty(); + if (!isQueueEmpty) { + currentTask = _taskQueue.front(); + _taskQueue.pop(); + isQueueEmpty = _taskQueue.empty(); + } + } + if (currentTask) { + waitingCounter--; + currentTask->runNoThrowNoBusyCheck(); + } + if (_isStopped) + break; + if (isQueueEmpty) // notify dtor, that all tasks were completed + _queueCondVar.notify_all(); + } + }); + } + while (_initCount != init_tasks.size()) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } +} + +MultiWorkerTaskExecutor::~MultiWorkerTaskExecutor() { + { + std::unique_lock lock(_queueMutex); + if (!_taskQueue.empty()) { + _queueCondVar.wait(lock, [this]() { return _taskQueue.empty(); }); + } + _isStopped = true; + _queueCondVar.notify_all(); + } + for (auto& thread : _threads) { + if (thread.joinable()) { + thread.join(); + } + } +} + +bool MultiWorkerTaskExecutor::startTask(InferenceEngine::Task::Ptr task) { + if (!task->occupy()) return false; + std::unique_lock lock(_queueMutex); + _taskQueue.push(task); + waitingCounter++; + _queueCondVar.notify_one(); + return true; +} + +}; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/cldnn_streams_task_executor.h b/inference-engine/src/cldnn_engine/cldnn_streams_task_executor.h new file mode 100644 index 00000000000000..aeb6ef1c6d58d8 --- /dev/null +++ b/inference-engine/src/cldnn_engine/cldnn_streams_task_executor.h @@ -0,0 +1,66 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ie_blob.h" +#include "ie_plugin.hpp" +#include "cpp/ie_cnn_network.h" +#include "debug_options.h" +#include "inference_engine.hpp" +#include +#include +#include "ie_parallel.hpp" +#include "cldnn_graph.h" + +namespace CLDNNPlugin { + +/* This structure handles an "execution context" - data required to execute an Infer Request. + * This includes graph (which handles the intermediate data) and arena/observer for the TBB */ +struct MultiWorkerTaskContext { + std::shared_ptr ptrGraph; +}; + +/* Class wrapping multiple worker threads that monitors the same queue with Infer Requests. */ +class MultiWorkerTaskExecutor : public InferenceEngine::ITaskExecutor { + static std::atomic waitingCounter; + +public: + typedef std::shared_ptr Ptr; + + explicit MultiWorkerTaskExecutor(const std::vector&, std::string name = "Default"); + + ~MultiWorkerTaskExecutor(); + + /** + * @brief Adds task for execution and notifies one of the working threads about the new task. + * @note can be called from multiple threads - tasks will be added to the queue and executed one-by-one in FIFO mode. + * @param task - shared pointer to the task + * @return true if succeed to add task, otherwise - false + */ + bool startTask(InferenceEngine::Task::Ptr task) override; + + static unsigned int GetWaitingCounter() { return waitingCounter.load(); } + + static thread_local MultiWorkerTaskContext ptrContext; + +private: + std::vector _threads; + std::mutex _queueMutex; + std::condition_variable _queueCondVar; + std::queue _taskQueue; + std::atomic _isStopped; + std::string _name; + std::atomic _initCount; +}; + +}; // namespace CLDNNPlugin diff --git a/inference-engine/src/cldnn_engine/debug_options.cpp b/inference-engine/src/cldnn_engine/debug_options.cpp index 3c964dcaebb59c..c0767c28014033 100644 --- a/inference-engine/src/cldnn_engine/debug_options.cpp +++ b/inference-engine/src/cldnn_engine/debug_options.cpp @@ -188,7 +188,6 @@ void DebugOptions::PrintNetworkOutputs(std::map(x); + + // get sign in 32bit format + uint32_t s = ((u & 0x8000) << 16); + + // check for NAN and INF + if ((u & EXP_MASK_F16) == EXP_MASK_F16) { + // keep mantissa only + u &= 0x03FF; + + // check if it is NAN and raise 10 bit to be align with intrin + if (u) { + u |= 0x0200; + } + + u <<= (23 - 10); + u |= EXP_MASK_F32; + u |= s; + } else if ((u & EXP_MASK_F16) == 0) { // check for zero and denormals. + uint16_t h_sig = (u & 0x03ffu); + if (h_sig == 0) { + /* Signed zero */ + u = s; + } else { + /* Subnormal */ + uint16_t h_exp = (u & EXP_MASK_F16); + h_sig <<= 1; + while ((h_sig & 0x0400u) == 0) { + h_sig <<= 1; + h_exp++; + } + uint32_t f_exp = (static_cast(127 - 15 - h_exp)) << 23; + uint32_t f_sig = (static_cast(h_sig & 0x03ffu)) << 13; + u = s + f_exp + f_sig; + } + } else { + // abs + u = (u & 0x7FFF); + + // shift mantissa and exp from f16 to f32 position + u <<= (23 - 10); + + // new bias for exp (f16 bias is 15 and f32 bias is 127) + u += ((127 - 15) << 23); + + // add sign + u |= s; + } + + // finaly represent result as float and return + return asfloat(u); +} + diff --git a/inference-engine/src/extension/simple_copy.cpp b/inference-engine/src/extension/common/simple_copy.cpp similarity index 100% rename from inference-engine/src/extension/simple_copy.cpp rename to inference-engine/src/extension/common/simple_copy.cpp diff --git a/inference-engine/src/extension/simple_copy.h b/inference-engine/src/extension/common/simple_copy.h similarity index 100% rename from inference-engine/src/extension/simple_copy.h rename to inference-engine/src/extension/common/simple_copy.h diff --git a/inference-engine/src/extension/ext_argmax.cpp b/inference-engine/src/extension/ext_argmax.cpp index 3a8dab38e0065b..a911476a1001a5 100644 --- a/inference-engine/src/extension/ext_argmax.cpp +++ b/inference-engine/src/extension/ext_argmax.cpp @@ -11,6 +11,10 @@ #include #include #include +#include +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) +#include +#endif namespace InferenceEngine { namespace Extensions { @@ -36,53 +40,367 @@ class ArgMaxImpl: public ExtLayerBase { } } - StatusCode execute(std::vector& inputs, std::vector& outputs, - ResponseDesc *resp) noexcept override { - SizeVector in_dims = inputs[0]->getTensorDesc().getDims(); - SizeVector out_dims = outputs[0]->getTensorDesc().getDims(); + template + void argmax_one_class_has_axis(float* src_data, float* dst_data, SizeVector in_dims) { + int axis_ = (axis_index_ < 0) ? axis_index_ + static_cast(in_dims.size()) : axis_index_; + int dim = static_cast(in_dims[axis_]); + int before_num = count(in_dims, 0, axis_); + int after_num = count(in_dims, axis_ + 1, in_dims.size()); + int first_index = 0; +#if defined(HAVE_AVX512F) + const int block_size = 16; + typedef __m512 vec_type_f; + typedef __m512i vec_type_i; + typedef __mmask16 vmask_type; +#elif defined(HAVE_AVX2) + const int block_size = 8; + typedef __m256 vec_type_f; + typedef __m256i vec_type_i; + typedef __m256 vmask_type; +#elif defined(HAVE_SSE) + const int block_size = 4; + typedef __m128 vec_type_f; + typedef __m128i vec_type_i; + typedef __m128 vmask_type; +#endif - int dim, axis_dist; - if (has_axis_) { - int axis_ = (axis_index_ < 0) ? axis_index_ + static_cast(in_dims.size()) : axis_index_; - dim = static_cast(inputs[0]->getTensorDesc().getDims()[axis_]); - axis_dist = count(inputs[0]->getTensorDesc().getDims(), axis_) / dim; - } else { - dim = count(inputs[0]->getTensorDesc().getDims(), 1); - axis_dist = 1; - } +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + parallel_for2d(before_num, after_num / block_size, [&](int i0, int ib1) { + int s_index = i0 * dim * after_num + ib1 * block_size; + vec_type_f vmax_val = _mm_uni_loadu_ps(src_data + s_index); + vec_type_i vindex_max_val = _mm_uni_setzero_si(); + for (int i2 = 1; i2 < dim; i2++) { + s_index += after_num; + vec_type_f vsrc = _mm_uni_loadu_ps(src_data + s_index); + vmask_type vmask = _mm_uni_cmpgt_ps(vsrc, vmax_val); + vmax_val = _mm_uni_blendv_ps(vmax_val, vsrc, vmask); + if (!out_max_val) { + vec_type_i vindex_cur_val = _mm_uni_set1_epi32(i2); +#if defined(HAVE_AVX512F) + vindex_max_val = _mm512_mask_blend_epi32(vmask, vindex_max_val, vindex_cur_val); +#else + vindex_max_val = _mm_uni_blendv_epi8(vindex_max_val, vindex_cur_val, _mm_uni_castps_si(vmask)); +#endif + } + } + if (!out_max_val) { + vec_type_f vindex_max_val_fp32 = _mm_uni_cvtepi32_ps(vindex_max_val); + _mm_uni_storeu_ps(dst_data + i0 * after_num + ib1 * block_size, vindex_max_val_fp32); + } else { + _mm_uni_storeu_ps(dst_data + i0 * after_num + ib1 * block_size, vmax_val); + } + }); + first_index = after_num / block_size * block_size; +#endif + int rest = after_num - first_index; + parallel_for2d(before_num, rest, [&](int i0, int i1) { + int index_max_val = 0; + int s_index = i0 * dim * after_num + first_index + i1; + float max_val = src_data[s_index]; + for (int i2 = 1; i2 < dim; i2++) { + s_index += after_num; + if (src_data[s_index] > max_val) { + max_val = src_data[s_index]; + if (!out_max_val) { + index_max_val = i2; + } + } + } + if (!out_max_val) + dst_data[i0 * after_num + first_index + i1] = static_cast(index_max_val); + else + dst_data[i0 * after_num + first_index + i1] = max_val; + }); + } - float* src_data = inputs[0]->buffer(); - float* dst_data = outputs[0]->buffer(); + template + void argmax_one_class(float* src_data, float* dst_data, SizeVector in_dims) { + int dim = count(in_dims, 1); + int before_num = in_dims[0]; + parallel_for(before_num, [&](int i0) { + int index_max_val = 0; + int s_index = i0 * dim; + float max_val = src_data[s_index]; + for (int i1 = 1; i1 < dim; i1++) { + s_index++; + if (src_data[s_index] > max_val) { + max_val = src_data[s_index]; + index_max_val = i1; + } + } + if (!out_max_val) { + dst_data[i0] = static_cast(index_max_val); + } else { + dst_data[i0 * 2] = static_cast(index_max_val); + dst_data[i0 * 2 + 1] = max_val; + } + }); + } + + template + void argmax_many_classes_has_axis(float* src_data, float* dst_data, SizeVector in_dims) { + int axis_ = (axis_index_ < 0) ? axis_index_ + static_cast(in_dims.size()) : axis_index_; + int dim = static_cast(in_dims[axis_]); + int before_num = count(in_dims, 0, axis_); + int after_num = count(in_dims, axis_ + 1, in_dims.size()); + int first_index = 0; +#if defined(HAVE_AVX512F) + const int block_size = 16; + typedef __m512 vec_type_f; + typedef __m512i vec_type_i; + typedef __mmask16 vmask_type; +#elif defined(HAVE_AVX2) + const int block_size = 8; + typedef __m256 vec_type_f; + typedef __m256i vec_type_i; + typedef __m256 vmask_type; +#elif defined(HAVE_SSE) + const int block_size = 4; + typedef __m128 vec_type_f; + typedef __m128i vec_type_i; + typedef __m128 vmask_type; +#endif + +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + if (top_k_ < count_vec) { + parallel_for2d(before_num, after_num / block_size, [&](int i0, int ib1) { +#if defined(HAVE_AVX512F) + const int N = 32; + vec_type_f vmax_values[N]; + vec_type_i vmax_indexes[N]; +#else + const int N = 16; + vec_type_f vmax_values[N]; + vec_type_i vmax_indexes[N]; +#endif + vec_type_f vtmp; + vec_type_i vtmp_indexes; + vmask_type vmask; + int s_index = i0 * dim * after_num + ib1 * block_size; + + auto vswap_func = [&](int index1, int index2) { + vtmp = vmax_values[index1]; + vmax_values[index1] = _mm_uni_blendv_ps(vmax_values[index1], vmax_values[index2], vmask); + vmax_values[index2] = _mm_uni_blendv_ps(vmax_values[index2], vtmp, vmask); + if (!out_max_val) { + vtmp_indexes = vmax_indexes[index1]; +#if defined(HAVE_AVX512F) + vmax_indexes[index1] = _mm512_mask_blend_epi32(vmask, vmax_indexes[index1], vmax_indexes[index2]); + vmax_indexes[index2] = _mm512_mask_blend_epi32(vmask, vmax_indexes[index2], vtmp_indexes); +#else + vmax_indexes[index1] = _mm_uni_blendv_epi8(vmax_indexes[index1], vmax_indexes[index2], _mm_uni_castps_si(vmask)); + vmax_indexes[index2] = _mm_uni_blendv_epi8(vmax_indexes[index2], vtmp_indexes, _mm_uni_castps_si(vmask)); +#endif + } + }; - int num = count(in_dims) / dim; - std::vector > src_vector(dim); + for (int i2 = 0; i2 < top_k_; i2++) { + vmax_values[i2] = _mm_uni_loadu_ps(src_data + s_index); + if (!out_max_val) { + vmax_indexes[i2] = _mm_uni_set1_epi32(i2); + } + s_index += after_num; + } + for (int i2 = 0; i2 < top_k_ - 1; i2++) { + for (int i3 = top_k_ - 1; i3 > i2; i3--) { + vmask = _mm_uni_cmpgt_ps(vmax_values[i3], vmax_values[i3 - 1]); +#if defined(HAVE_AVX512F) + if (vmask) { + vswap_func(i3, i3 - 1); + } +#else + int swap = _mm_uni_movemask_ps(vmask); + if (swap) { + vswap_func(i3, i3 - 1); + } +#endif + } + } + for (int i2 = top_k_; i2 < dim; i2++) { + vmax_values[top_k_] = _mm_uni_loadu_ps(src_data + s_index); + if (!out_max_val) { + vmax_indexes[top_k_] = _mm_uni_set1_epi32(i2); + } + for (int i3 = top_k_; i3 > 0; i3--) { + vmask = _mm_uni_cmpgt_ps(vmax_values[i3], vmax_values[i3 - 1]); +#if defined(HAVE_AVX512F) + if (vmask) { + vswap_func(i3, i3 - 1); + } else { + break; + } +#else + int swap = _mm_uni_movemask_ps(vmask); + if (swap) { + vswap_func(i3, i3 - 1); + } else { + break; + } +#endif + } + s_index += after_num; + } + for (int i2 = 0; i2 < top_k_; i2++) { + if (!out_max_val) { + _mm_uni_storeu_ps(dst_data + (i0 * top_k_ + i2) * after_num + ib1 * block_size, + _mm_uni_cvtepi32_ps(vmax_indexes[i2])); + } else { + _mm_uni_storeu_ps(dst_data + (i0 * top_k_ + i2) * after_num + ib1 * block_size, vmax_values[i2]); + } + } + }); + first_index = after_num / block_size * block_size; + } +#endif + int rest = after_num - first_index; + parallel_for2d(before_num, rest, [&](int i0, int i1) { + std::vector max_values(top_k_ + 1); + std::vector max_indexes(top_k_ + 1); + float tmp_value; + int tmp_index; + int s_index = i0 * dim * after_num + first_index + i1; + + auto swap_func = [&](int index1, int index2) { + tmp_value = max_values[index1]; + max_values[index1] = max_values[index2]; + max_values[index2] = tmp_value; + if (!out_max_val) { + tmp_index = max_indexes[index1]; + max_indexes[index1] = max_indexes[index2]; + max_indexes[index2] = tmp_index; + } + }; - for (int i = 0; i < num; ++i) { - for (int j = 0; j < dim; ++j) { - src_vector[j] = std::make_pair( - src_data[(i / axis_dist * dim + j) * axis_dist + i % axis_dist], j); + for (int i2 = 0; i2 < top_k_; i2++) { + max_values[i2] = src_data[s_index]; + if (!out_max_val) { + max_indexes[i2] = i2; + } + s_index += after_num; + } + for (int i2 = 0; i2 < top_k_ - 1; i2++) { + for (int i3 = top_k_ - 1; i3 > i2; i3--) { + if (max_values[i3] > max_values[i3 - 1]) { + swap_func(i3, i3 - 1); + } + } } + for (int i2 = top_k_; i2 < dim; i2++) { + max_values[top_k_] = src_data[s_index]; + if (!out_max_val) { + max_indexes[top_k_] = i2; + } + for (int i3 = top_k_; i3 > 0; i3--) { + if (max_values[i3] > max_values[i3 - 1]) { + swap_func(i3, i3 - 1); + } else { + break; + } + } + s_index += after_num; + } + for (int i2 = 0; i2 < top_k_; i2++) { + if (!out_max_val) { + dst_data[i0 * top_k_ * after_num + i2 * after_num + first_index + i1] = static_cast(max_indexes[i2]); + } else { + dst_data[i0 * top_k_ * after_num + i2 * after_num + first_index + i1] = max_values[i2]; + } + } + }); + } - std::partial_sort(src_vector.begin(), src_vector.begin() + top_k_, - src_vector.end(), std::greater >()); + template + void argmax_many_classes(float* src_data, float* dst_data, SizeVector in_dims) { + int dim = count(in_dims, 1); + int before_num = in_dims[0]; + parallel_for(before_num, [&](int i0) { + std::vector max_values(top_k_ + 1); + std::vector max_indexes(top_k_ + 1); + float tmp_value; + int tmp_index; + int s_index = i0 * dim; - for (int j = 0; j < top_k_; ++j) { - if (out_max_val_) { - if (has_axis_) { - // Produces max_val per axis - dst_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = src_vector[j].first; + auto swap_func = [&](int index1, int index2) { + tmp_value = max_values[index1]; + max_values[index1] = max_values[index2]; + max_values[index2] = tmp_value; + + tmp_index = max_indexes[index1]; + max_indexes[index1] = max_indexes[index2]; + max_indexes[index2] = tmp_index; + }; + + for (int i2 = 0; i2 < top_k_; i2++) { + max_values[i2] = src_data[s_index]; + max_indexes[i2] = i2; + s_index++; + } + for (int i2 = 0; i2 < top_k_ - 1; i2++) { + for (int i3 = top_k_ - 1; i3 > i2; i3--) { + if (max_values[i3] > max_values[i3 - 1]) { + swap_func(i3, i3 - 1); + } + } + } + for (int i2 = top_k_; i2 < dim; i2++) { + max_values[top_k_] = src_data[s_index]; + max_indexes[top_k_] = i2; + for (int i3 = top_k_; i3 > 0; i3--) { + if (max_values[i3] > max_values[i3 - 1]) { + swap_func(i3, i3 - 1); } else { - // Produces max_ind and max_val - dst_data[2 * i * top_k_ + j] = static_cast(src_vector[j].second); - dst_data[2 * i * top_k_ + top_k_ + j] = src_vector[j].first; + break; } + } + s_index++; + } + for (int i2 = 0; i2 < top_k_; i2++) { + if (!out_max_val) { + dst_data[i0 * top_k_ + i2] = static_cast(max_indexes[i2]); } else { - // Produces max_ind per axis - dst_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = static_cast(src_vector[j].second); + dst_data[i0 * 2 * top_k_ + i2] = static_cast(max_indexes[i2]); + dst_data[i0 * 2 * top_k_ + top_k_ + i2] = max_values[i2]; } } - } + }); + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, + ResponseDesc *resp) noexcept override { + SizeVector in_dims = inputs[0]->getTensorDesc().getDims(); + float* src_data = inputs[0]->buffer(); + float* dst_data = outputs[0]->buffer(); + + if (top_k_ == 1) { + if (has_axis_) { + if (out_max_val_) { + argmax_one_class_has_axis(src_data, dst_data, in_dims); + } else { + argmax_one_class_has_axis(src_data, dst_data, in_dims); + } + } else { + if (out_max_val_) { + argmax_one_class(src_data, dst_data, in_dims); + } else { + argmax_one_class(src_data, dst_data, in_dims); + } + } + } else { + if (has_axis_) { + if (out_max_val_) { + argmax_many_classes_has_axis(src_data, dst_data, in_dims); + } else { + argmax_many_classes_has_axis(src_data, dst_data, in_dims); + } + } else { + if (out_max_val_) { + argmax_many_classes(src_data, dst_data, in_dims); + } else { + argmax_many_classes(src_data, dst_data, in_dims); + } + } + } return OK; } @@ -92,6 +410,12 @@ class ArgMaxImpl: public ExtLayerBase { bool has_axis_; int axis_index_; +#if defined(HAVE_AVX512F) + const int count_vec = 32; +#elif defined(HAVE_SSE) || defined(HAVE_AVX2) + const int count_vec = 16; +#endif + inline int count(SizeVector dims, size_t start_ind, size_t end_ind) { size_t count = 1; for (size_t i = start_ind; i < end_ind; i++) diff --git a/inference-engine/src/extension/ext_base.cpp b/inference-engine/src/extension/ext_base.cpp index dc1339a858921c..a20b336ad2f535 100644 --- a/inference-engine/src/extension/ext_base.cpp +++ b/inference-engine/src/extension/ext_base.cpp @@ -59,10 +59,10 @@ void ExtLayerBase::addConfig(const CNNLayer* layer, std::vectorinsData.size()) - THROW_IE_EXCEPTION << "Incorrect number of input edges. Expected " << layer->insData.size() + THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << layer->name << ". Expected " << layer->insData.size() << " but layout specification provided for " << in_l.size(); if (out_l.size() != layer->outData.size()) - THROW_IE_EXCEPTION << "Incorrect number of input edges. Expected " << layer->outData.size() + THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << layer->name << ". Expected " << layer->outData.size() << " but layout specification provided for " << out_l.size(); // Fill tensor parameters into config diff --git a/inference-engine/src/extension/ext_base.hpp b/inference-engine/src/extension/ext_base.hpp index 79148421ed52b0..59f143f9fc3385 100644 --- a/inference-engine/src/extension/ext_base.hpp +++ b/inference-engine/src/extension/ext_base.hpp @@ -8,7 +8,7 @@ #include #include -#if defined(HAVE_AVX2) || defined(HAVE_AVX512F) +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) #include #endif @@ -51,6 +51,10 @@ class ExtLayerBase: public ILayerExecImpl { return _mm512_storeu_ps(pdst, vec); } + static inline void _mm_uni_storeu_si(void* pdst, const __m512i vec) { + return _mm512_storeu_si512(pdst, vec); + } + static inline __m512 _mm_uni_setzero_ps() { return _mm512_setzero_ps(); } @@ -78,6 +82,74 @@ class ExtLayerBase: public ILayerExecImpl { static inline __m512 _mm_uni_sqrt_ps(__m512 vec) { return _mm512_sqrt_ps(vec); } + + static inline __m512 _mm_uni_and_ps(__m512 vec0, __m512 vec1) { + return _mm512_castsi512_ps(_mm512_and_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1))); + } + + static inline __m512 _mm_uni_or_ps(__m512 vec0, __m512 vec1) { + return _mm512_castsi512_ps(_mm512_or_epi32(_mm512_castps_si512(vec0), _mm512_castps_si512(vec1))); + } + + static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __m512 vmask) { + return _mm512_mask_blend_ps(_mm512_cmpneq_epi32_mask(_mm512_castps_si512(vmask), _mm512_set1_epi32(0)), vec0, vec1); + } + + static inline __m512 _mm_uni_blendv_ps(__m512 vec0, __m512 vec1, __mmask16 vmask) { + return _mm512_mask_blend_ps(vmask, vec0, vec1); + } + + static inline __m512 _mm_uni_min_ps(__m512 vec0, __m512 vec1) { + return _mm512_min_ps(vec0, vec1); + } + + static inline __m512 _mm_uni_max_ps(__m512 vec0, __m512 vec1) { + return _mm512_max_ps(vec0, vec1); + } + + static inline __m512 _mm_uni_floor_ps(__m512 vec) { + return _mm512_floor_ps(vec); + } + + static inline __m512i _mm_uni_cvtps_epi32(__m512 vec) { + return _mm512_cvtps_epi32(vec); + } + + static inline __m512i _mm_uni_add_epi32(__m512i vec0, __m512i vec1) { + return _mm512_add_epi32(vec0, vec1); + } + + static inline __m512i _mm_uni_set1_epi32(int value) { + return _mm512_set1_epi32(value); + } + + static inline __m512i _mm_uni_slli_epi32(__m512i vec, int value) { + return _mm512_sll_epi32(vec, _mm_set1_epi64x(value)); + } + + static inline __m512 _mm_uni_castsi_ps(__m512i vec) { + return _mm512_castsi512_ps(vec); + } + + static inline __m512i _mm_uni_setzero_si() { + return _mm512_setzero_si512(); + } + + static inline __mmask16 _mm_uni_cmpgt_ps(__m512 vec0, __m512 vec1) { + return _mm512_cmp_ps_mask(vec0, vec1, 14); + } + + static inline __mmask16 _mm_uni_cmpgt_i32(__m512i vec0, __m512i vec1) { + return _mm512_cmp_epi32_mask(vec1, vec0, 1); + } + + static inline __m512i _mm_uni_castps_si(__m512 vec) { + return _mm512_castps_si512(vec); + } + + static inline __m512 _mm_uni_cvtepi32_ps(__m512i vec) { + return _mm512_cvtepi32_ps(vec); + } #elif defined(HAVE_AVX2) static inline __m256 _mm_uni_loadu_ps(const float* psrc) { return _mm256_loadu_ps(psrc); @@ -87,6 +159,10 @@ class ExtLayerBase: public ILayerExecImpl { return _mm256_storeu_ps(pdst, vec); } + static inline void _mm_uni_storeu_si(__m256i* pdst, const __m256i vec) { + return _mm256_storeu_si256(pdst, vec); + } + static inline __m256 _mm_uni_setzero_ps() { return _mm256_setzero_ps(); } @@ -114,6 +190,189 @@ class ExtLayerBase: public ILayerExecImpl { static inline __m256 _mm_uni_sqrt_ps(__m256 vec) { return _mm256_sqrt_ps(vec); } + + static inline __m256 _mm_uni_and_ps(__m256 vec0, __m256 vec1) { + return _mm256_and_ps(vec0, vec1); + } + + static inline __m256 _mm_uni_or_ps(__m256 vec0, __m256 vec1) { + return _mm256_or_ps(vec0, vec1); + } + + static inline __m256 _mm_uni_blendv_ps(__m256 vec0, __m256 vec1, __m256 vmask) { + return _mm256_blendv_ps(vec0, vec1, vmask); + } + + static inline __m256 _mm_uni_min_ps(__m256 vec0, __m256 vec1) { + return _mm256_min_ps(vec0, vec1); + } + + static inline __m256 _mm_uni_max_ps(__m256 vec0, __m256 vec1) { + return _mm256_max_ps(vec0, vec1); + } + + static inline __m256 _mm_uni_floor_ps(__m256 vec) { + return _mm256_floor_ps(vec); + } + + static inline __m256i _mm_uni_cvtps_epi32(__m256 vec) { + return _mm256_cvtps_epi32(vec); + } + + static inline __m256i _mm_uni_add_epi32(__m256i vec0, __m256i vec1) { + return _mm256_add_epi32(vec0, vec1); + } + + static inline __m256i _mm_uni_set1_epi32(int value) { + return _mm256_set1_epi32(value); + } + + static inline __m256i _mm_uni_slli_epi32(__m256i vec, int value) { + return _mm256_slli_epi32(vec, value); + } + + static inline __m256 _mm_uni_castsi_ps(__m256i vec) { + return _mm256_castsi256_ps(vec); + } + + static inline __m256i _mm_uni_setzero_si() { + return _mm256_setzero_si256(); + } + + static inline __m256 _mm_uni_cmpgt_ps(__m256 vec0, __m256 vec1) { + return _mm256_cmp_ps(vec0, vec1, 14); + } + + static inline __m256 _mm_uni_cmpgt_i32(__m256i vec0, __m256i vec1) { + return _mm256_cvtepi32_ps(_mm256_cmpgt_epi32(vec0, vec1)); + } + + static inline __m256i _mm_uni_blendv_epi8(__m256i vec0, __m256i vec1, __m256i vmask) { + return _mm256_blendv_epi8(vec0, vec1, vmask); + } + + static inline __m256i _mm_uni_castps_si(__m256 vec) { + return _mm256_castps_si256(vec); + } + + static inline __m256 _mm_uni_cvtepi32_ps(__m256i vec) { + return _mm256_cvtepi32_ps(vec); + } + + static inline int _mm_uni_movemask_ps(__m256 vec) { + return _mm256_movemask_ps(vec); + } +#elif defined(HAVE_SSE) + static inline __m128 _mm_uni_loadu_ps(const float* psrc) { + return _mm_loadu_ps(psrc); + } + + static inline void _mm_uni_storeu_ps(float* pdst, const __m128 vec) { + return _mm_storeu_ps(pdst, vec); + } + + static inline void _mm_uni_storeu_si(__m128i* pdst, const __m128i vec) { + return _mm_storeu_si128(pdst, vec); + } + + static inline __m128 _mm_uni_setzero_ps() { + return _mm_setzero_ps(); + } + + static inline __m128 _mm_uni_set1_ps(float value) { + return _mm_set1_ps(value); + } + + static inline __m128 _mm_uni_add_ps(__m128 vec0, __m128 vec1) { + return _mm_add_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_sub_ps(__m128 vec0, __m128 vec1) { + return _mm_sub_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_mul_ps(__m128 vec0, __m128 vec1) { + return _mm_mul_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_div_ps(__m128 vec0, __m128 vec1) { + return _mm_div_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_sqrt_ps(__m128 vec) { + return _mm_sqrt_ps(vec); + } + + static inline __m128 _mm_uni_and_ps(__m128 vec0, __m128 vec1) { + return _mm_and_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_or_ps(__m128 vec0, __m128 vec1) { + return _mm_or_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_blendv_ps(__m128 vec0, __m128 vec1, __m128 vmask) { + return _mm_blendv_ps(vec0, vec1, vmask); + } + + static inline __m128 _mm_uni_min_ps(__m128 vec0, __m128 vec1) { + return _mm_min_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_max_ps(__m128 vec0, __m128 vec1) { + return _mm_max_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_floor_ps(__m128 vec) { + return _mm_floor_ps(vec); + } + + static inline __m128i _mm_uni_cvtps_epi32(__m128 vec) { + return _mm_cvtps_epi32(vec); + } + + static inline __m128i _mm_uni_add_epi32(__m128i vec0, __m128i vec1) { + return _mm_add_epi32(vec0, vec1); + } + + static inline __m128i _mm_uni_set1_epi32(int value) { + return _mm_set1_epi32(value); + } + + static inline __m128i _mm_uni_slli_epi32(__m128i vec, int value) { + return _mm_slli_epi32(vec, value); + } + + static inline __m128 _mm_uni_castsi_ps(__m128i vec) { + return _mm_castsi128_ps(vec); + } + + static inline __m128i _mm_uni_setzero_si() { + return _mm_setzero_si128(); + } + + static inline __m128 _mm_uni_cmpgt_ps(__m128 vec0, __m128 vec1) { + return _mm_cmpgt_ps(vec0, vec1); + } + + static inline __m128 _mm_uni_cmpgt_i32(__m128i vec0, __m128i vec1) { + return _mm_cvtepi32_ps(_mm_cmpgt_epi32(vec0, vec1)); + } + + static inline __m128i _mm_uni_blendv_epi8(__m128i vec0, __m128i vec1, __m128i vmask) { + return _mm_blendv_epi8(vec0, vec1, vmask); + } + + static inline __m128i _mm_uni_castps_si(__m128 vec) { + return _mm_castps_si128(vec); + } + + static inline __m128 _mm_uni_cvtepi32_ps(__m128i vec) { + return _mm_cvtepi32_ps(vec); + } + static inline int _mm_uni_movemask_ps(__m128 vec) { + return _mm_movemask_ps(vec); + } #endif }; diff --git a/inference-engine/src/extension/ext_broadcast.cpp b/inference-engine/src/extension/ext_broadcast.cpp new file mode 100644 index 00000000000000..8de54b35f4d942 --- /dev/null +++ b/inference-engine/src/extension/ext_broadcast.cpp @@ -0,0 +1,192 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include +#include +#include +#include +#include "ie_parallel.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class BroadcastImpl: public ExtLayerBase { +public: + explicit BroadcastImpl(const CNNLayer* layer) { + try { + if (layer->insData.empty() || layer->outData.empty()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; + + if (layer->insData.size() != 2) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; + + SizeVector shape_dims = layer->insData[BROADCAST_SHAPE].lock()->getTensorDesc().getDims(); + if (shape_dims.size() > 1) + THROW_IE_EXCEPTION << layer->name << " Shape vector should be 1 dimension"; + + if (layer->insData[BROADCAST_SHAPE].lock()->getTensorDesc().getPrecision() != Precision::I32) + THROW_IE_EXCEPTION << layer->name << " Shape vector should be I32!"; + + if (!(layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getPrecision() == Precision::I32 && + layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) && + !(layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getPrecision() == Precision::FP32 && + layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) { + THROW_IE_EXCEPTION << layer->name << + " Input and output tensors should have same precision and only FP32 and I32 are supported!"; + } + + src_dims = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getDims(); + srcStrides = layer->insData[BROADCAST_INPUT].lock()->getTensorDesc().getBlockingDesc().getStrides(); + addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, + { DataConfigurator(ConfLayout::PLN) }); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + int32_t* shape_dims = inputs[BROADCAST_SHAPE]->cbuffer().as() + + inputs[BROADCAST_SHAPE]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + size_t shape_size = (inputs[BROADCAST_SHAPE]->getTensorDesc().getDims())[0]; + SizeVector dst_dims = outputs[0]->getTensorDesc().getDims(); + + if (dst_dims.size() != shape_size) { + if (resp) { + std::string errorMsg = "Output tensor dimension mismatch"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return PARAMETER_MISMATCH; + } + + if (src_dims.size() > dst_dims.size()) { + if (resp) { + std::string errorMsg = "Output tensor dimension is smaller then input tensor dimension"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return PARAMETER_MISMATCH; + } + + size_t i; + for (i = 0; i < dst_dims.size(); i++) { + if (static_cast(dst_dims[i]) != shape_dims[i]) { + if (resp) { + std::string errorMsg = "Output tensor dimension size mismatch"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return PARAMETER_MISMATCH; + } + } + + size_t prefix_size = dst_dims.size() - src_dims.size(); + for (i = 0; i < src_dims.size(); i++) { + if (src_dims[i] != 1 && + static_cast(src_dims[i]) != shape_dims[i + prefix_size]) { + if (resp) { + std::string errorMsg = "In/Output corresponding dimension must have the same value, or Input dimension is equal to 1"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return PARAMETER_MISMATCH; + } + } + + InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides(); + InferenceEngine::SizeVector src_aligned(dst_dims.size()); + InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size()); + for (i = 0; i < dst_dims.size(); i++) { + if (i < prefix_size) { + src_aligned[i] = 1; + srcStrides_aligned[i] = srcStrides[0]; + } else { + src_aligned[i] = src_dims[i - prefix_size]; + srcStrides_aligned[i] = srcStrides[i - prefix_size]; + } + } + + size_t work_amount_dst = dstStrides[0] * dst_dims[0]; + + switch (outputs[0]->getTensorDesc().getPrecision()) { + case Precision::FP32: { + const float *src_data = inputs[BROADCAST_INPUT]->cbuffer().as() + + inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + float* dst_data = outputs[0]->cbuffer().as() + + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + parallel_nt(0, [&](const int ithr, const int nthr) { + size_t i, src_idx, start = 0, end = 0; + SizeVector counters(dst_dims.size(), 0); + splitter(work_amount_dst, nthr, ithr, start, end); + for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) { + counters[j] = i % dst_dims[j]; + i /= dst_dims[j]; + } + for (size_t iwork = start; iwork < end; ++iwork) { + for (i = 0, src_idx = 0; i < dst_dims.size(); ++i) + src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0; + + dst_data[iwork] = src_data[src_idx]; + + for (int j = dst_dims.size() - 1; j >= 0; j--) { + counters[j] = (counters[j] + 1) % dst_dims[j]; + if (counters[j] != 0) break; + } + } + }); + } + break; + case Precision::I32: { + const int32_t *src_data = inputs[BROADCAST_INPUT]->cbuffer().as() + + inputs[BROADCAST_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + int32_t* dst_data = outputs[0]->cbuffer().as() + + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + parallel_nt(0, [&](const int ithr, const int nthr) { + size_t i, src_idx, start = 0, end = 0; + SizeVector counters(dst_dims.size(), 0); + splitter(work_amount_dst, nthr, ithr, start, end); + for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) { + counters[j] = i % dst_dims[j]; + i /= dst_dims[j]; + } + for (size_t iwork = start; iwork < end; ++iwork) { + for (i = 0, src_idx = 0; i < dst_dims.size(); ++i) + src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0; + + dst_data[iwork] = src_data[src_idx]; + + for (int j = dst_dims.size() - 1; j >= 0; j--) { + counters[j] = (counters[j] + 1) % dst_dims[j]; + if (counters[j] != 0) break; + } + } + }); + } + break; + default: + if (resp) { + std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return GENERAL_ERROR; + } + + return OK; + } + +private: + const size_t BROADCAST_INPUT = 0; + const size_t BROADCAST_SHAPE = 1; + + SizeVector src_dims; + SizeVector srcStrides; +}; + +REG_FACTORY_FOR(ImplFactory, Broadcast); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_detectionoutput.cpp b/inference-engine/src/extension/ext_detectionoutput.cpp index 1ec523fa7d38b4..0a3c445500d817 100644 --- a/inference-engine/src/extension/ext_detectionoutput.cpp +++ b/inference-engine/src/extension/ext_detectionoutput.cpp @@ -35,16 +35,16 @@ class DetectionOutputImpl: public ExtLayerBase { _num_classes = layer->GetParamAsInt("num_classes"); _background_label_id = layer->GetParamAsInt("background_label_id", 0); _top_k = layer->GetParamAsInt("top_k", -1); - _variance_encoded_in_target = layer->GetParamsAsBool("variance_encoded_in_target", false); + _variance_encoded_in_target = layer->GetParamAsBool("variance_encoded_in_target", false); _keep_top_k = layer->GetParamAsInt("keep_top_k", -1); _nms_threshold = layer->GetParamAsFloat("nms_threshold"); _confidence_threshold = layer->GetParamAsFloat("confidence_threshold", -FLT_MAX); - _share_location = layer->GetParamsAsBool("share_location", true); - _clip_before_nms = layer->GetParamsAsBool("clip_before_nms", false) || - layer->GetParamsAsBool("clip", false); // for backward compatibility - _clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false); - _decrease_label_id = layer->GetParamsAsBool("decrease_label_id", false); - _normalized = layer->GetParamsAsBool("normalized", true); + _share_location = layer->GetParamAsBool("share_location", true); + _clip_before_nms = layer->GetParamAsBool("clip_before_nms", false) || + layer->GetParamAsBool("clip", false); // for backward compatibility + _clip_after_nms = layer->GetParamAsBool("clip_after_nms", false); + _decrease_label_id = layer->GetParamAsBool("decrease_label_id", false); + _normalized = layer->GetParamAsBool("normalized", true); _image_height = layer->GetParamAsInt("input_height", 1); _image_width = layer->GetParamAsInt("input_width", 1); _prior_size = _normalized ? 4 : 5; @@ -63,7 +63,7 @@ class DetectionOutputImpl: public ExtLayerBase { << _num_priors * _num_loc_classes * 4 << " vs " << layer->insData[idx_location].lock()->getDims()[1] << ")"; - if (_num_priors * _num_classes != static_cast(layer->insData[idx_confidence].lock()->dims[0])) + if (_num_priors * _num_classes != static_cast(layer->insData[idx_confidence].lock()->getTensorDesc().getDims().back())) THROW_IE_EXCEPTION << "Number of priors must match number of confidence predictions."; if (_decrease_label_id && _background_label_id != 0) @@ -95,7 +95,7 @@ class DetectionOutputImpl: public ExtLayerBase { _detections_count = InferenceEngine::make_shared_blob({Precision::I32, detections_size, C}); _detections_count->allocate(); - InferenceEngine::SizeVector conf_size = layer->insData[idx_confidence].lock()->dims; + const InferenceEngine::SizeVector &conf_size = layer->insData[idx_confidence].lock()->getTensorDesc().getDims(); _reordered_conf = InferenceEngine::make_shared_blob({Precision::FP32, conf_size, ANY}); _reordered_conf->allocate(); diff --git a/inference-engine/src/extension/ext_expand.cpp b/inference-engine/src/extension/ext_expand.cpp deleted file mode 100644 index ec712239b65190..00000000000000 --- a/inference-engine/src/extension/ext_expand.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright (C) 2018-2019 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "ext_list.hpp" -#include "ext_base.hpp" - -#include -#include -#include -#include -#include "ie_parallel.hpp" - -namespace InferenceEngine { -namespace Extensions { -namespace Cpu { - -class ExpandImpl: public ExtLayerBase { -public: - explicit ExpandImpl(const CNNLayer* layer) { - try { - if (layer->insData.empty() || layer->outData.empty()) - THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; - - if (layer->insData.size() != 2) - THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; - - SizeVector shape_dims = layer->insData[EXPAND_SHAPE].lock()->getTensorDesc().getDims(); - if (shape_dims.size() > 1) - THROW_IE_EXCEPTION << layer->name << " Shape vector should be 1 dimension"; - - if (layer->insData[EXPAND_SHAPE].lock()->getTensorDesc().getPrecision() != Precision::I32) - THROW_IE_EXCEPTION << layer->name << " Shape vector should be I32!"; - - if (!(layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getPrecision() == Precision::I32 && - layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) && - !(layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getPrecision() == Precision::FP32 && - layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) { - THROW_IE_EXCEPTION << layer->name << - " Input and output tensors should have same precision and only FP32 and I32 are supported!"; - } - - src_dims = layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getDims(); - srcStrides = layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getBlockingDesc().getStrides(); - addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, - { DataConfigurator(ConfLayout::PLN) }); - } catch (InferenceEngine::details::InferenceEngineException &ex) { - errorMsg = ex.what(); - } - } - - StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { - int32_t* shape_dims = inputs[EXPAND_SHAPE]->cbuffer().as() + - inputs[EXPAND_SHAPE]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - size_t shape_size = (inputs[EXPAND_SHAPE]->getTensorDesc().getDims())[0]; - SizeVector dst_dims = outputs[0]->getTensorDesc().getDims(); - - if (dst_dims.size() != shape_size) { - if (resp) { - std::string errorMsg = "Output tensor dimension mismatch"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } - - if (src_dims.size() > dst_dims.size()) { - if (resp) { - std::string errorMsg = "Output tensor dimension is smaller then input tensor dimension"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } - - size_t i; - for (i = 0; i < dst_dims.size(); i++) { - if (static_cast(dst_dims[i]) != shape_dims[i]) { - if (resp) { - std::string errorMsg = "Output tensor dimension size mismatch"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } - } - - size_t prefix_size = dst_dims.size() - src_dims.size(); - for (i = 0; i < src_dims.size(); i++) { - if (src_dims[i] != 1 && - static_cast(src_dims[i]) != shape_dims[i + prefix_size]) { - if (resp) { - std::string errorMsg = "In/Output corresponding dimension must have the same value, or Input dimension is equal to 1"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } - } - - InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides(); - InferenceEngine::SizeVector src_aligned(dst_dims.size()); - InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size()); - for (i = 0; i < dst_dims.size(); i++) { - if (i < prefix_size) { - src_aligned[i] = 1; - srcStrides_aligned[i] = srcStrides[0]; - } else { - src_aligned[i] = src_dims[i - prefix_size]; - srcStrides_aligned[i] = srcStrides[i - prefix_size]; - } - } - - size_t work_amount_dst = dstStrides[0] * dst_dims[0]; - - switch (outputs[0]->precision()) { - case Precision::FP32: { - const float *src_data = inputs[EXPAND_INPUT]->cbuffer().as() + - inputs[EXPAND_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - float* dst_data = outputs[0]->cbuffer().as() + - outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - - parallel_nt(0, [&](const int ithr, const int nthr) { - size_t i, src_idx, start = 0, end = 0; - SizeVector counters(dst_dims.size(), 0); - splitter(work_amount_dst, nthr, ithr, start, end); - for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) { - counters[j] = i % dst_dims[j]; - i /= dst_dims[j]; - } - for (size_t iwork = start; iwork < end; ++iwork) { - for (i = 0, src_idx = 0; i < dst_dims.size(); ++i) - src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0; - - dst_data[iwork] = src_data[src_idx]; - - for (int j = dst_dims.size() - 1; j >= 0; j--) { - counters[j] = (counters[j] + 1) % dst_dims[j]; - if (counters[j] != 0) break; - } - } - }); - } - break; - case Precision::I32: { - const int32_t *src_data = inputs[EXPAND_INPUT]->cbuffer().as() + - inputs[EXPAND_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - int32_t* dst_data = outputs[0]->cbuffer().as() + - outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - - parallel_nt(0, [&](const int ithr, const int nthr) { - size_t i, src_idx, start = 0, end = 0; - SizeVector counters(dst_dims.size(), 0); - splitter(work_amount_dst, nthr, ithr, start, end); - for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) { - counters[j] = i % dst_dims[j]; - i /= dst_dims[j]; - } - for (size_t iwork = start; iwork < end; ++iwork) { - for (i = 0, src_idx = 0; i < dst_dims.size(); ++i) - src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0; - - dst_data[iwork] = src_data[src_idx]; - - for (int j = dst_dims.size() - 1; j >= 0; j--) { - counters[j] = (counters[j] + 1) % dst_dims[j]; - if (counters[j] != 0) break; - } - } - }); - } - break; - default: - if (resp) { - std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return GENERAL_ERROR; - } - - return OK; - } - -private: - const size_t EXPAND_INPUT = 0; - const size_t EXPAND_SHAPE = 1; - - SizeVector src_dims; - SizeVector srcStrides; -}; - -REG_FACTORY_FOR(ImplFactory, Expand); - -} // namespace Cpu -} // namespace Extensions -} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_fill.cpp b/inference-engine/src/extension/ext_fill.cpp index 45dd43414a4d6c..1c499cade8819c 100644 --- a/inference-engine/src/extension/ext_fill.cpp +++ b/inference-engine/src/extension/ext_fill.cpp @@ -77,7 +77,7 @@ class FillImpl: public ExtLayerBase { } } - switch (outputs[0]->precision()) { + switch (outputs[0]->getTensorDesc().getPrecision()) { case Precision::FP32: { float* dst_data = outputs[0]->cbuffer().as() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); diff --git a/inference-engine/src/extension/ext_gather.cpp b/inference-engine/src/extension/ext_gather.cpp index 03527cec61a985..898149b0031d9e 100644 --- a/inference-engine/src/extension/ext_gather.cpp +++ b/inference-engine/src/extension/ext_gather.cpp @@ -12,7 +12,8 @@ #include #include #include "ie_parallel.hpp" -#include "simple_copy.h" +#include "common/simple_copy.h" +#include "common/fp16_utils.h" namespace InferenceEngine { namespace Extensions { @@ -26,8 +27,12 @@ class GatherImpl: public ExtLayerBase { THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; Precision inIdxPrecision = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getPrecision(); - if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32) - THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 or I32 are supported!"; + if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16) + THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32, FP16 or I32 are supported!"; + + Precision inDataPrecision = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getPrecision(); + if (inDataPrecision != Precision::FP32 && inDataPrecision != Precision::FP16) + THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 or FP16 are supported!"; // Remove redundant dimensions const SizeVector& dictionary_dims = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getDims(); @@ -64,19 +69,40 @@ class GatherImpl: public ExtLayerBase { THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!"; addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, - { DataConfigurator(ConfLayout::PLN) }); + { DataConfigurator(ConfLayout::PLN) }); } catch (InferenceEngine::details::InferenceEngineException &ex) { errorMsg = ex.what(); } } + struct f32toUi32 { + inline unsigned int operator()(const float value) { + return static_cast(value); + } + }; + + struct f16toUi32 { + inline unsigned int operator()(const ie_fp16 value) { + return static_cast(f16tof32(value)); + } + }; + + struct i32toUi32 { + inline unsigned int operator()(const int32_t value) { + return static_cast(value); + } + }; + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { - switch (inputs[GATHER_INDEXES]->precision()) { + switch (inputs[GATHER_INDEXES]->getTensorDesc().getPrecision()) { case Precision::FP32: - gather(inputs[GATHER_INDEXES]->cbuffer().as(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]); + gather(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]); + break; + case Precision::FP16: + gather(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]); break; case Precision::I32: - gather(inputs[GATHER_INDEXES]->cbuffer().as(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]); + gather(inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]); break; default: return GENERAL_ERROR; @@ -86,8 +112,49 @@ class GatherImpl: public ExtLayerBase { } private: - template - void gather(data_t *src_dataIdx, Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output); + template + void gather(Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output) { + size_t src_indexSize = indexes->size(); + const index_t *src_index = indexes->cbuffer().as() + indexes->getTensorDesc().getBlockingDesc().getOffsetPadding(); + const data_t *src_dataDict = dictionary->cbuffer().as() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding(); + data_t *dst_data = output->cbuffer().as() + output->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + if (axis == 0) { + parallel_for(src_indexSize, [&](size_t i) { + unsigned int idx = Conversion()(src_index[i]); + + // Index clipping + if (idx < indexRange) { + // Copying data to destination from Dictionary + simple_copy(&dst_data[i * dataLength], + output->byteSize() - (dataLength * i), + &src_dataDict[dataLength * idx], + sizeof(data_t) * dataLength); + } else { + memset(&dst_data[i * dataLength], 0, sizeof(data_t) * dataLength); + } + }); + } else { + parallel_for(src_indexSize, [&](size_t i) { + unsigned int idx = Conversion()(src_index[i]); + + // Index clipping + if (idx < indexRange) { + // Copying data to destination from Dictionary + for (size_t j = 0; j < numDictionaries; j++) { + simple_copy(&dst_data[dataLength * (i + j * src_indexSize)], + output->byteSize() - (dataLength * (i + j * src_indexSize)), + &src_dataDict[dataLength * (idx + j * indexRange)], + sizeof(data_t) * dataLength); + } + } else { + for (size_t j = 0; j < numDictionaries; j++) { + memset(&dst_data[dataLength * (i + j * src_indexSize)], 0, sizeof(data_t) * dataLength); + } + } + }); + } + } int axis = 0; size_t numDictionaries = 1; @@ -97,49 +164,6 @@ class GatherImpl: public ExtLayerBase { const size_t GATHER_INDEXES = 1; }; -template -void GatherImpl::gather(data_t *src_dataIdx, Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output) { - size_t src_dataIdxSize = indexes->size(); - const float *src_dataDict = dictionary->cbuffer().as() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding(); - float* dst_data = output->cbuffer().as() + output->getTensorDesc().getBlockingDesc().getOffsetPadding(); - src_dataIdx += indexes->getTensorDesc().getBlockingDesc().getOffsetPadding(); - - if (axis == 0) { - parallel_for(src_dataIdxSize, [&](size_t i) { - unsigned int idx = static_cast(src_dataIdx[i]); - - // Index clipping - if (idx < indexRange) { - // Copying data to destination from Dictionary - simple_copy(&dst_data[i * dataLength], - output->byteSize() - (dataLength * i), - &src_dataDict[dataLength * idx], - sizeof(float) * dataLength); - } else { - std::fill_n(&dst_data[i * dataLength], dataLength, 0.f); - } - }); - } else { - parallel_for(src_dataIdxSize, [&](size_t i) { - unsigned int idx = static_cast(src_dataIdx[i]); - - // Index clipping - if (idx < indexRange) { - // Copying data to destination from Dictionary - for (size_t j = 0; j < numDictionaries; j++) { - simple_copy(&dst_data[dataLength * (i + j * src_dataIdxSize)], - output->byteSize() - (dataLength * (i + j * src_dataIdxSize)), - &src_dataDict[dataLength * (idx + j * indexRange)], - sizeof(float) * dataLength); - } - } else { - for (size_t j = 0; j < numDictionaries; j++) { - std::fill_n(&dst_data[dataLength * (i + j * src_dataIdxSize)], dataLength, 0.f); - } - } - }); - } -} REG_FACTORY_FOR(ImplFactory, Gather); diff --git a/inference-engine/src/extension/ext_gather_tree.cpp b/inference-engine/src/extension/ext_gather_tree.cpp new file mode 100644 index 00000000000000..c448634dfe5961 --- /dev/null +++ b/inference-engine/src/extension/ext_gather_tree.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include "ie_parallel.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class GatherTreeImpl: public ExtLayerBase { +public: + explicit GatherTreeImpl(const CNNLayer* layer) { + try { + if (layer->insData.empty() || layer->outData.empty()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges."; + + if (layer->insData.size() != 4) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges."; + if (layer->outData.size() != 1) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of output edges."; + + precision = layer->insData[GATHER_TREE_STEP_IDX].lock()->getTensorDesc().getPrecision(); + + if (precision != Precision::FP32 && precision != Precision::I32) + THROW_IE_EXCEPTION << layer->name << " Incorrect data tensor precision. Only I32 or FP32 are supported."; + + if (layer->insData[GATHER_TREE_PARENT_IDX].lock()->getTensorDesc().getPrecision() != precision || + layer->insData[GATHER_TREE_MAX_SEQ_LEN].lock()->getTensorDesc().getPrecision() != precision || + layer->insData[GATHER_TREE_END_TOKEN].lock()->getTensorDesc().getPrecision() != precision || + layer->outData[0]->getTensorDesc().getPrecision() != precision) + THROW_IE_EXCEPTION << layer->name << " Incorrect input/output data tensor precision. Should be the same."; + + if (layer->insData[GATHER_TREE_STEP_IDX].lock()->getTensorDesc().getDims().size() != 3) + THROW_IE_EXCEPTION << layer->name << " step_idx vector should be 3 dimension"; + if (layer->insData[GATHER_TREE_PARENT_IDX].lock()->getTensorDesc().getDims().size() != 3) + THROW_IE_EXCEPTION << layer->name << " parent_idx vector should be 3 dimension"; + if (layer->insData[GATHER_TREE_MAX_SEQ_LEN].lock()->getTensorDesc().getDims().size() != 1) + THROW_IE_EXCEPTION << layer->name << " max_seq_len vector should be 1 dimension"; + if (layer->insData[GATHER_TREE_END_TOKEN].lock()->getTensorDesc().getDims().size() != 1) + THROW_IE_EXCEPTION << layer->name << " end_token should be 1 dimension"; + + addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), + DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, + { DataConfigurator(ConfLayout::PLN) }); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + if (precision == Precision::FP32) + return execute_impl(inputs, outputs, resp); + else + return execute_impl(inputs, outputs, resp); + } + + template + StatusCode execute_impl(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept { + const auto *step_idx = inputs[GATHER_TREE_STEP_IDX]->cbuffer().as() + + inputs[GATHER_TREE_STEP_IDX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + const auto *parent_idx = inputs[GATHER_TREE_PARENT_IDX]->cbuffer().as() + + inputs[GATHER_TREE_PARENT_IDX]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + const auto *max_seq_len = inputs[GATHER_TREE_MAX_SEQ_LEN]->cbuffer().as() + + inputs[GATHER_TREE_MAX_SEQ_LEN]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + auto end_token = (inputs[GATHER_TREE_END_TOKEN]->cbuffer().as() + + inputs[GATHER_TREE_END_TOKEN]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0]; + auto * final_idx = outputs[0]->cbuffer().as() + + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + SizeVector step_idx_dims = inputs[GATHER_TREE_STEP_IDX]->getTensorDesc().getDims(); + SizeVector parent_idx_dims = inputs[GATHER_TREE_PARENT_IDX]->getTensorDesc().getDims(); + SizeVector max_seq_len_dims = inputs[GATHER_TREE_MAX_SEQ_LEN]->getTensorDesc().getDims(); + SizeVector final_idx_dims = outputs[0]->getTensorDesc().getDims(); + int32_t max_time = step_idx_dims[0]; + size_t batch_size = step_idx_dims[1]; + size_t beam_width = step_idx_dims[2]; + size_t bb_size = batch_size * beam_width; + + if (max_time != static_cast(parent_idx_dims[0]) || max_time != static_cast(final_idx_dims[0]) || + batch_size != parent_idx_dims[1] || batch_size != final_idx_dims[1] || batch_size != max_seq_len_dims[0] || + beam_width != parent_idx_dims[2] || beam_width != final_idx_dims[2]) { + if (resp) { + std::string errorMsg = "Input/Output tensors dimensions mismatch"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return PARAMETER_MISMATCH; + } + + bool incorrect_result = false; + parallel_for2d(batch_size, beam_width, [&](size_t batch, size_t beam) { + int32_t max_sequence_in_beam = std::min(max_time, static_cast(max_seq_len[batch])); + if (max_sequence_in_beam > 0) { + int32_t time, idx = (max_time - 1) * bb_size + batch * beam_width; + for (time = (max_time - 1); time >= max_sequence_in_beam; time--, idx -= bb_size) + final_idx[idx + beam] = end_token; + + for (int32_t parent = static_cast(beam); time >= 0; time--, idx -= bb_size) { + if (parent < 0 || parent >= static_cast(beam_width)) { + incorrect_result = true; + break; + } + final_idx[idx + beam] = step_idx[idx + parent]; + parent = static_cast(parent_idx[idx + parent]); + } + + bool finished = false; + auto *final = &final_idx[batch * beam_width + beam]; + for (time = 0; time < max_sequence_in_beam; time++, final += bb_size) { + if (finished) + (*final) = end_token; + else if ((*final) == end_token) + finished = true; + } + } + }); + + if (incorrect_result) { + if (resp) { + std::string errorMsg = "Wrong parent index, result is incorrect"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return OUT_OF_BOUNDS; + } + + return OK; + } + +private: + const size_t GATHER_TREE_STEP_IDX = 0; + const size_t GATHER_TREE_PARENT_IDX = 1; + const size_t GATHER_TREE_MAX_SEQ_LEN = 2; + const size_t GATHER_TREE_END_TOKEN = 3; + + InferenceEngine::Precision precision; +}; + +REG_FACTORY_FOR(ImplFactory, GatherTree); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_interp.cpp b/inference-engine/src/extension/ext_interp.cpp index 3b3b684c61ea19..ceb01a0f84920e 100644 --- a/inference-engine/src/extension/ext_interp.cpp +++ b/inference-engine/src/extension/ext_interp.cpp @@ -4,7 +4,9 @@ #include "ext_list.hpp" #include "ext_base.hpp" +#include #include +#include #if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) #include #endif @@ -21,21 +23,52 @@ class InterpImpl: public ExtLayerBase { if (layer->insData.size() != 1 || layer->outData.empty()) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[0].lock()->dims.size() != 4) + if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4) THROW_IE_EXCEPTION << "Interp supports only 4d blobs!"; + auto src_precision = layer->insData[0].lock()->getTensorDesc().getPrecision(); + if (src_precision != Precision::FP32 && src_precision != Precision::U8) + THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only U8 or FP32 are supported!"; + + if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32) + THROW_IE_EXCEPTION << layer->name << " Incorrect output data tensor precision. Only FP32 is supported!"; + // We don't read other parameters since they are needed only for dst reshape in caffe pad_beg = layer->GetParamAsInt("pad_beg"); pad_end = layer->GetParamAsInt("pad_end"); - align_corners = layer->GetParamsAsBool("align_corners", true); - + align_corners = layer->GetParamAsBool("align_corners", true); + + ConfLayout blk_layout; + if (src_precision == Precision::U8) { + LayerConfig config; + DataConfig dataConfigDct; + dataConfigDct.desc = TensorDesc(Precision::U8, layer->insData[0].lock()->getTensorDesc().getDims(), Layout::NCHW); + config.inConfs.push_back(dataConfigDct); + + DataConfig dataConfigOut; + const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims(); + SizeVector blocks = out_dims; + SizeVector order(blocks.size()); + SizeVector dimOffsets(blocks.size()); + SizeVector strides(blocks.size()); + size_t offset(std::numeric_limits::max()); + for (size_t i = 0; i < order.size(); i++) { + strides[i] = std::numeric_limits::max(); + dimOffsets[i] = 0; + order[i] = i; + } + dataConfigOut.desc = TensorDesc(Precision::FP32, out_dims, { blocks, order, offset, dimOffsets, strides }); + config.outConfs.push_back(dataConfigOut); + config.dynBatchSupport = false; + confs.push_back(config); + } else { #if defined(HAVE_AVX512F) - auto blk_layout = ConfLayout::BLK16; + blk_layout = ConfLayout::BLK16; #else - auto blk_layout = ConfLayout::BLK8; + blk_layout = ConfLayout::BLK8; #endif - - addConfig(layer, {DataConfigurator(blk_layout)}, {DataConfigurator(blk_layout)}); + addConfig(layer, { DataConfigurator(blk_layout) }, { DataConfigurator(blk_layout) }); + } } catch (InferenceEngine::details::InferenceEngineException &ex) { errorMsg = ex.what(); } @@ -43,23 +76,41 @@ class InterpImpl: public ExtLayerBase { StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { - int IN = static_cast(inputs[0]->getTensorDesc().getDims()[0]); - int IC = static_cast( - inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1] * - inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[4]); - int IH = static_cast(inputs[0]->getTensorDesc().getDims()[2]); - int IW = static_cast(inputs[0]->getTensorDesc().getDims()[3]); + size_t IN = inputs[0]->getTensorDesc().getDims()[0]; + size_t IH = inputs[0]->getTensorDesc().getDims()[2]; + size_t IW = inputs[0]->getTensorDesc().getDims()[3]; + size_t OH = outputs[0]->getTensorDesc().getDims()[2]; + size_t OW = outputs[0]->getTensorDesc().getDims()[3]; - int OH = static_cast(outputs[0]->getTensorDesc().getDims()[2]); - int OW = static_cast(outputs[0]->getTensorDesc().getDims()[3]); + size_t IH_pad = IH + pad_beg + pad_end; + size_t IW_pad = IW + pad_beg + pad_end; - int IH_pad = IH + pad_beg + pad_end; - int IW_pad = IW + pad_beg + pad_end; - - const auto *src_data = inputs[0]->buffer().as(); auto *dst_data = outputs[0]->buffer().as(); - interpolate(IN, IC, src_data, -pad_beg, -pad_beg, IH_pad, IW_pad, IH, IW, dst_data, 0, 0, OH, OW, OH, OW); + switch (inputs[0]->getTensorDesc().getPrecision()) { + case Precision::FP32: + { + size_t IC = inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[1] * + inputs[0]->getTensorDesc().getBlockingDesc().getBlockDims()[4]; + interpolate(IN, IC, inputs[0]->buffer().as(), + -pad_beg, -pad_beg, IH_pad, IW_pad, IH, IW, dst_data, 0, 0, OH, OW, OH, OW); + } + break; + case Precision::U8: + { + size_t IC = inputs[0]->getTensorDesc().getDims()[1]; + interpolate_8u(inputs[0]->getTensorDesc().getLayout(), IN, IC, inputs[0]->buffer().as(), + -pad_beg, -pad_beg, IH_pad, IW_pad, IH, IW, dst_data, 0, 0, OH, OW, OH, OW); + } + break; + default: + if (resp) { + std::string errorMsg = "Incorrect input precision. Only U8 or FP32 are supported!"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return GENERAL_ERROR; + } + return OK; } @@ -68,13 +119,13 @@ class InterpImpl: public ExtLayerBase { int pad_end; bool align_corners; - void interpolate(const int N, const int C, + void interpolate(const size_t N, const size_t C, const float *src, const int x1, const int y1, - const int IH_pad, const int IW_pad, const int IH, const int IW, + const int IH_pad, const int IW_pad, const size_t IH, const size_t IW, float *dst, const int x2, const int y2, - const int OH_pad, const int OW_pad, const int OH, const int OW) { + const int OH_pad, const int OW_pad, const size_t OH, const size_t OW) { if (IH_pad == OH_pad && IW_pad == OW_pad) { - for (int i = 0; i < N * C * OH * OW; i++) { + for (size_t i = 0; i < N * C * OH * OW; i++) { dst[i] = src[i]; } return; @@ -97,11 +148,11 @@ class InterpImpl: public ExtLayerBase { #endif // Align channel number to block size to deal with channels padding in IE with multiple blobs - int CB = (C + block_size - 1) & (-block_size); + size_t CB = (C + block_size - 1) & (-block_size); - int CH = (C + block_size - 1) / block_size; + size_t CH = (C + block_size - 1) / block_size; - parallel_for3d(N, CH, OH_pad, [&](int n, int cb, int h) { + parallel_for3d(N, CH, OH_pad, [&](size_t n, size_t cb, size_t h) { const float *psrc = src + n * CB * IH * IW; float fh = rh * h; @@ -193,6 +244,55 @@ class InterpImpl: public ExtLayerBase { } }); } + + void interpolate_8u(Layout layout, const size_t N, const size_t C, + const uint8_t *src, const int x1, const int y1, + const int IH_pad, const int IW_pad, const size_t IH, const size_t IW, + float *dst, const int x2, const int y2, + const int OH_pad, const int OW_pad, const size_t OH, const size_t OW) { + if (IH_pad == OH_pad && IW_pad == OW_pad) { + for (size_t i = 0; i < N * C * OH * OW; i++) { + dst[i] = static_cast(src[i]); + } + return; + } + + float rh; + float rw; + if (align_corners) { + rh = (OH_pad > 1) ? static_cast(IH_pad - 1) / (OH_pad - 1) : 0.0f; + rw = (OW_pad > 1) ? static_cast(IW_pad - 1) / (OW_pad - 1) : 0.0f; + } else { + rh = static_cast(IH_pad) / (OH_pad); + rw = static_cast(IW_pad) / (OW_pad); + } + + parallel_for3d(N, C, OH_pad, [&](size_t n, size_t cb, size_t h) { + const uint8_t *psrc = src + n * C * IH * IW; + + float fh = rh * h; + int ih0 = static_cast(fh); + int ih1 = (ih0 < IH_pad - 1) ? ih0 + 1 : ih0; + + float h_lambda0 = fh - ih0; + float h_lambda1 = 1.0f - h_lambda0; + + for (int w = 0; w < OW_pad; ++w) { + float fw = rw * w; + int iw0 = static_cast(fw); + int iw1 = (iw0 < IW_pad - 1) ? iw0 + 1 : iw0; + + float w_lambda0 = fw - iw0; + float w_lambda1 = 1.0f - w_lambda0; + + dst[n * C * OH * OW + cb * OW * OH + (y2 + h) * OW + (x2 + w)] = + h_lambda1 * (w_lambda1 * static_cast(psrc[cb * IW * IH + (y1 + ih0) * IW + (x1 + iw0)]) + + w_lambda0 * static_cast(psrc[cb * IW * IH + (y1 + ih0) * IW + (x1 + iw1)])) + + h_lambda0 * (w_lambda1 * static_cast(psrc[cb * IW * IH + (y1 + ih1) * IW + (x1 + iw0)]) + + w_lambda0 * static_cast(psrc[cb * IW * IH + (y1 + ih1) * IW + (x1 + iw1)])); + } + }); + } }; REG_FACTORY_FOR(ImplFactory, Interp); diff --git a/inference-engine/src/extension/ext_list.cpp b/inference-engine/src/extension/ext_list.cpp index 89058be6580f49..4bcd9fa653574f 100644 --- a/inference-engine/src/extension/ext_list.cpp +++ b/inference-engine/src/extension/ext_list.cpp @@ -31,8 +31,8 @@ void CpuExtensions::AddShapeInferImpl(std::string name, const IShapeInferImpl::P void CpuExtensions::GetVersion(const Version*& versionInfo) const noexcept { static Version ExtensionDescription = { - { 1, 6 }, // extension API version - "1.6", + { 2, 0 }, // extension API version + "2.0", "ie-cpu-ext" // extension description message }; diff --git a/inference-engine/src/extension/ext_list.hpp b/inference-engine/src/extension/ext_list.hpp index 08f6235b4c08b5..6d150c3ec6ca0f 100644 --- a/inference-engine/src/extension/ext_list.hpp +++ b/inference-engine/src/extension/ext_list.hpp @@ -68,17 +68,6 @@ class ExtRegisterBase { #define REG_FACTORY_FOR(__prim, __type) \ static ExtRegisterBase<__prim> __reg__##__type(#__type) -template -class ShapeInferImplRegister { -public: - explicit ShapeInferImplRegister(const std::string& type) { - CpuExtensions::AddShapeInferImpl(type, std::make_shared()); - } -}; - -#define REG_SHAPE_INFER_FOR_TYPE(__impl, __type) \ -static ShapeInferImplRegister<__impl> __reg__si__##__type(#__type) - } // namespace Cpu } // namespace Extensions } // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_log_softmax.cpp b/inference-engine/src/extension/ext_log_softmax.cpp new file mode 100644 index 00000000000000..ba53dc846143fb --- /dev/null +++ b/inference-engine/src/extension/ext_log_softmax.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include +#include +#include +#include +#include +#include +#include "ie_parallel.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class LogSoftmaxImpl: public ExtLayerBase { +public: + explicit LogSoftmaxImpl(const CNNLayer* layer) { + try { + if (layer->insData.empty() || layer->outData.empty()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; + + if (layer->insData.size() != 1) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; + + if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32) + THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only FP32 is supported!"; + + SizeVector dims = layer->insData[0].lock()->getTensorDesc().getDims(); + int axis = layer->GetParamAsInt("axis", -1); + if (axis < 0) + axis += dims.size(); + + if (dims.size() < static_cast(1 + axis)) + THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!"; + + int j; + for (j = dims.size() - 1; j >= 0; j--) { + if (dims[j] != 1) break; + } + if (j == axis) is_last_dim = true; + + for (int i = 0; i < axis; i++) + axis_step *= dims[i]; + reduced_axis_size = dims[axis]; + for (size_t i = (axis + 1); i < dims.size(); i++) + reduced_axis_stride *= dims[i]; + + addConfig(layer, { { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } }); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + const float *src_data = inputs[0]->cbuffer().as() + + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + float* dst_data = outputs[0]->cbuffer().as() + + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + if (is_last_dim) { + parallel_for(axis_step, [&](size_t i) { + float reduce_prod = 0.0f; + const float *src_dataPtr = &src_data[i * reduced_axis_size]; + for (size_t j = 0; j < reduced_axis_size; ++j) + reduce_prod += expf(src_dataPtr[j]); + reduce_prod = logf(reduce_prod); + float *dst_dataPtr = reinterpret_cast(&dst_data[i * reduced_axis_size]); + for (size_t j = 0; j < reduced_axis_size; ++j) + dst_dataPtr[j] = src_dataPtr[j] - reduce_prod; + }); + } else { + parallel_for2d(axis_step, reduced_axis_stride, [&](size_t k, size_t i) { + float reduce_prod = 0.0f; + const float *src_dataPtr = &src_data[k * reduced_axis_stride * reduced_axis_size + i]; + for (size_t j = 0; j < reduced_axis_size; ++j) { + reduce_prod += expf((*src_dataPtr)); + src_dataPtr += reduced_axis_stride; + } + + reduce_prod = logf(reduce_prod); + src_dataPtr = &src_data[k * reduced_axis_stride * reduced_axis_size + i]; + float *dst_dataPtr = reinterpret_cast(&dst_data[k * reduced_axis_stride * reduced_axis_size + i]); + for (size_t j = 0; j < reduced_axis_size; ++j) { + (*dst_dataPtr) = (*src_dataPtr) - reduce_prod; + src_dataPtr += reduced_axis_stride; + dst_dataPtr += reduced_axis_stride; + } + }); + } + + return OK; + } + +private: + size_t reduced_axis_size; + size_t reduced_axis_stride = 1; + size_t axis_step = 1; + bool is_last_dim = false; +}; + +REG_FACTORY_FOR(ImplFactory, LogSoftmax); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_math.cpp b/inference-engine/src/extension/ext_math.cpp new file mode 100644 index 00000000000000..6c89a886c41757 --- /dev/null +++ b/inference-engine/src/extension/ext_math.cpp @@ -0,0 +1,301 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include +#include +#include +#include +#include "ie_parallel.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class MathImpl: public ExtLayerBase { + static float error_function(float x) { + const float clip_bound = 2.86f; + // Points clip_bound and -clip_bound are extremums for this polynom + // So in order to provide better accuracy comparing to std::erf we have to clip input range + if (x > clip_bound) + return 1; + if (x < -clip_bound) + return -1; + + // A polynomial approximation of the error function + const float erfNumerator[4] = { 90.0260162353515625f, 2232.00537109375f, + 7003.3251953125f, 55592.30078125f }; + const float erfDenominator[5] = { 33.56171417236328125f, 521.35797119140625f, + 4594.32373046875f, 22629.0f, 49267.39453125f }; + float polynom = 9.60497379302978515625f; + float x2 = x * x; + for (float c : erfNumerator) { + polynom = polynom * x2 + c; + } + x *= polynom; + polynom = 1.0f; + for (float c : erfDenominator) { + polynom = polynom * x2 + c; + } + return x / polynom; + } + +public: + explicit MathImpl(const CNNLayer* layer) { + try { + if (layer->insData.empty() || layer->outData.empty()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; + + if (layer->insData.size() != 1) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; + + if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32) + THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 is supported!"; + + if (layer->insData[0].lock()->getTensorDesc().getDims() != layer->outData[0]->getTensorDesc().getDims()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!"; + + alpha = layer->GetParamAsFloat("alpha", 0.0f); + beta = layer->GetParamAsFloat("beta", 0.0f); + gamma = layer->GetParamAsFloat("gamma", 0.0f); + + std::string math_func = layer->type; + if (math_func == "Erf") mathFunction = Math::Erf; + else if (math_func == "Abs") mathFunction = Math::Abs; + else if (math_func == "Acos") mathFunction = Math::Acos; + else if (math_func == "Acosh") mathFunction = Math::Acosh; + else if (math_func == "Asin") mathFunction = Math::Asin; + else if (math_func == "Asinh") mathFunction = Math::Asinh; + else if (math_func == "Atan") mathFunction = Math::Atan; + else if (math_func == "Atanh") mathFunction = Math::Atanh; + else if (math_func == "Ceil") mathFunction = Math::Ceil; + else if (math_func == "Cos") mathFunction = Math::Cos; + else if (math_func == "Cosh") mathFunction = Math::Cosh; + else if (math_func == "Floor") mathFunction = Math::Floor; + else if (math_func == "HardSigmoid") mathFunction = Math::HardSigmoid; + else if (math_func == "Log") mathFunction = Math::Log; + else if (math_func == "Neg") mathFunction = Math::Neg; + else if (math_func == "Reciprocal") mathFunction = Math::Reciprocal; + else if (math_func == "Selu") mathFunction = Math::Selu; + else if (math_func == "Sign") mathFunction = Math::Sign; + else if (math_func == "Sin") mathFunction = Math::Sin; + else if (math_func == "Sinh") mathFunction = Math::Sinh; + else if (math_func == "Softplus") mathFunction = Math::Softplus; + else if (math_func == "Softsign") mathFunction = Math::Softsign; + else if (math_func == "Tan") mathFunction = Math::Tan; + else + THROW_IE_EXCEPTION << layer->name << " Incorrect Math layer type!"; + + addConfig(layer, { { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } }); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + size_t dataSize = outputs[0]->size(); + const float *src_data = inputs[0]->cbuffer().as() + + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + float* dst_data = outputs[0]->cbuffer().as() + + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + + switch (mathFunction) { + case Math::Erf: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = error_function(src_data[i]); + }); + break; + case Math::Abs: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = (std::abs)(src_data[i]); + }); + break; + case Math::Acos: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = acosf(src_data[i]); + }); + break; + case Math::Acosh: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = acoshf(src_data[i]); + }); + break; + case Math::Asin: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = asinf(src_data[i]); + }); + break; + case Math::Asinh: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = asinhf(src_data[i]); + }); + break; + case Math::Atan: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = atanf(src_data[i]); + }); + break; + case Math::Atanh: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = atanhf(src_data[i]); + }); + break; + case Math::Ceil: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = ceilf(src_data[i]); + }); + break; + case Math::Cos: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = cosf(src_data[i]); + }); + break; + case Math::Cosh: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = coshf(src_data[i]); + }); + break; + case Math::Floor: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = floorf(src_data[i]); + }); + break; + case Math::HardSigmoid: + alpha = (alpha == 0.0f) ? 0.2f : alpha; + beta = (beta == 0.0f) ? 0.5f : beta; + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = (std::max)(0.f, (std::min)(1.f, alpha * src_data[i] + beta)); + }); + break; + case Math::Log: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = logf(src_data[i]); + }); + break; + case Math::Neg: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = -src_data[i]; + }); + break; + case Math::Reciprocal: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = 1.0f / src_data[i]; + }); + break; + case Math::Selu: + alpha = (alpha == 0.0f) ? 1.67326f : alpha; + gamma = (gamma == 0.0f) ? 1.0507f : gamma; + parallel_for(dataSize, [&](size_t i) { + float x = src_data[i]; + dst_data[i] = (x > 0.0f) ? (gamma * x) : (gamma * alpha * (exp(x) - 1.0f)); + }); + break; + case Math::Sign: + parallel_for(dataSize, [&](size_t i) { + if (src_data[i] > 0.0f) + dst_data[i] = 1.0f; + else if (src_data[i] < 0.0f) + dst_data[i] = -1.0f; + else + dst_data[i] = 0.0f; + }); + break; + case Math::Sin: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = sinf(src_data[i]); + }); + break; + case Math::Sinh: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = sinhf(src_data[i]); + }); + break; + case Math::Softplus: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = logf(expf(src_data[i]) + 1); + }); + break; + case Math::Softsign: + parallel_for(dataSize, [&](size_t i) { + float x = src_data[i]; + dst_data[i] = x / (1.f + (std::abs)(x)); + }); + break; + case Math::Tan: + parallel_for(dataSize, [&](size_t i) { + dst_data[i] = tanf(src_data[i]); + }); + break; + default: + if (resp) { + std::string errorMsg = "Incorrect Reduce layer type"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return GENERAL_ERROR; + } + return OK; + } + +private: + enum class Math { + Abs, + Acos, + Acosh, + Asin, + Asinh, + Atan, + Atanh, + Ceil, + Cos, + Cosh, + Erf, + Floor, + HardSigmoid, + Log, + Neg, + Reciprocal, + Selu, + Sign, + Sin, + Sinh, + Softplus, + Softsign, + Tan + }; + + Math mathFunction = Math::Erf; + float alpha = 0.0f; + float beta = 0.0f; + float gamma = 0.0f; +}; + +REG_FACTORY_FOR(ImplFactory, Abs); +REG_FACTORY_FOR(ImplFactory, Acos); +REG_FACTORY_FOR(ImplFactory, Acosh); +REG_FACTORY_FOR(ImplFactory, Asin); +REG_FACTORY_FOR(ImplFactory, Asinh); +REG_FACTORY_FOR(ImplFactory, Atan); +REG_FACTORY_FOR(ImplFactory, Atanh); +REG_FACTORY_FOR(ImplFactory, Ceil); +REG_FACTORY_FOR(ImplFactory, Cos); +REG_FACTORY_FOR(ImplFactory, Cosh); +REG_FACTORY_FOR(ImplFactory, Erf); +REG_FACTORY_FOR(ImplFactory, Floor); +REG_FACTORY_FOR(ImplFactory, HardSigmoid); +REG_FACTORY_FOR(ImplFactory, Log); +REG_FACTORY_FOR(ImplFactory, Neg); +REG_FACTORY_FOR(ImplFactory, Reciprocal); +REG_FACTORY_FOR(ImplFactory, Selu); +REG_FACTORY_FOR(ImplFactory, Sign); +REG_FACTORY_FOR(ImplFactory, Sin); +REG_FACTORY_FOR(ImplFactory, Sinh); +REG_FACTORY_FOR(ImplFactory, Softplus); +REG_FACTORY_FOR(ImplFactory, Softsign); +REG_FACTORY_FOR(ImplFactory, Tan); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_mvn.cpp b/inference-engine/src/extension/ext_mvn.cpp index 7c09e5342fd143..4ae1b214dbc567 100644 --- a/inference-engine/src/extension/ext_mvn.cpp +++ b/inference-engine/src/extension/ext_mvn.cpp @@ -52,7 +52,7 @@ class MVNImpl: public ExtLayerBase { float* src_data = inputs[0]->buffer(); float* dst_data = outputs[0]->buffer(); - if (inputs[0]->layout() == NCHW || inputs[0]->layout() == NCDHW) { + if (inputs[0]->getTensorDesc().getLayout() == NCHW || inputs[0]->getTensorDesc().getLayout() == NCDHW) { mvn_pln(src_data, dst_data, inputs[0]->getTensorDesc().getDims()); } else { mvn_blk(src_data, dst_data, inputs[0]->getTensorDesc().getDims()); diff --git a/inference-engine/src/extension/ext_normalize.cpp b/inference-engine/src/extension/ext_normalize.cpp index 448d0cb1f7f646..7cf0bf54061b2c 100644 --- a/inference-engine/src/extension/ext_normalize.cpp +++ b/inference-engine/src/extension/ext_normalize.cpp @@ -25,8 +25,10 @@ class NormalizeImpl: public ExtLayerBase { if (layer->insData.size() != 1 || layer->outData.size() != 1) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[0].lock()->dims.size() < 2 || layer->insData[0].lock()->dims.size() > 4) + if (layer->insData[0].lock()->getTensorDesc().getDims().size() < 2 || + layer->insData[0].lock()->getTensorDesc().getDims().size() > 4) { THROW_IE_EXCEPTION << "Normalize supports from 2D to 4D blobs!"; + } weights = std::dynamic_pointer_cast>(layer->blobs.at("weights")); if (!weights) diff --git a/inference-engine/src/extension/ext_one_hot.cpp b/inference-engine/src/extension/ext_one_hot.cpp new file mode 100644 index 00000000000000..73c9fbe1cd8ebf --- /dev/null +++ b/inference-engine/src/extension/ext_one_hot.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class OneHotImpl: public ExtLayerBase { +public: + explicit OneHotImpl(const CNNLayer* layer) { + try { + depth = layer->GetParamAsUInt("depth"); + on_value = layer->GetParamAsFloat("on_value", 1.0f); + off_value = layer->GetParamAsFloat("off_value", 0.0f); + axis = layer->GetParamAsInt("axis", -1); + + src_dims = layer->insData[0].lock()->getTensorDesc().getDims(); + dst_dims = layer->outData[0]->getTensorDesc().getDims(); + + int output_dims_size = dst_dims.size(); + if (layer->CheckParamPresence("axis") && + (-1 > axis || axis >= output_dims_size)) { + THROW_IE_EXCEPTION << "The value of " << layer->name << " layer axis parameter must be between -1 <= axis < "\ + << output_dims_size << ", but actually it is " << axis; + } + + if (!( ((1 + src_dims.size()) == dst_dims.size()) || + (src_dims.size() == 1 && dst_dims.size() == 1 && dst_dims[0] == depth && src_dims[0] == 1))) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!"; + + addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) }); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + const auto *src_data = inputs[0]->cbuffer().as(); + auto *dst_data = outputs[0]->buffer().as(); + std::size_t prefix_size = 1; + auto input_dims = inputs[0]->getTensorDesc().getDims(); + + std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis; + for (size_t i = 0; i < actual_axis; ++i) + prefix_size *= input_dims[i]; + + std::size_t suffix_size = inputs[0]->size() / prefix_size; + + std::size_t dst_offset = 0; + for (std::size_t prefix_idx = 0; prefix_idx < prefix_size; ++prefix_idx) { + for (std::size_t depth_idx = 0; depth_idx < depth; ++depth_idx) { + for (std::size_t suffix_idx = 0; suffix_idx < suffix_size; suffix_idx++) { + auto src_index = prefix_idx * suffix_size + suffix_idx; + std::size_t v = static_cast(src_data[src_index]); + dst_data[dst_offset++] = (v == depth_idx) ? on_value : off_value; + } + } + } + return OK; + } + +private: + uint32_t depth; + float on_value = 1.f; + float off_value = 0.f; + int32_t axis = -1; + SizeVector src_dims; + SizeVector dst_dims; +}; + +REG_FACTORY_FOR(ImplFactory, OneHot); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_priorbox.cpp b/inference-engine/src/extension/ext_priorbox.cpp index d1cb1955cc8aec..e5489a934a6ed7 100644 --- a/inference-engine/src/extension/ext_priorbox.cpp +++ b/inference-engine/src/extension/ext_priorbox.cpp @@ -9,20 +9,29 @@ #include #include #include +#include "ie_parallel.hpp" namespace InferenceEngine { namespace Extensions { namespace Cpu { class PriorBoxImpl: public ExtLayerBase { + static inline float clip_great(float x, float threshold) { + return x < threshold ? x : threshold; + } + + static inline float clip_less(float x, float threshold) { + return x > threshold ? x : threshold; + } + public: explicit PriorBoxImpl(const CNNLayer *layer) { try { if (layer->insData.size() != 2 || layer->outData.empty()) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[0].lock()->dims.size() != 4 || - layer->insData[1].lock()->dims.size() != 4) + if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4 || + layer->insData[1].lock()->getTensorDesc().getDims().size() != 4) THROW_IE_EXCEPTION << "PriorBox supports only 4D blobs!"; _offset = layer->GetParamAsFloat("offset"); @@ -33,6 +42,10 @@ class PriorBoxImpl: public ExtLayerBase { _clip = layer->GetParamAsBool("clip", false); _scale_all_sizes = layer->GetParamAsBool("scale_all_sizes", true); + _fixed_sizes = layer->GetParamAsFloats("fixed_size", {}); + _fixed_ratios = layer->GetParamAsFloats("fixed_ratio", {}); + _densitys = layer->GetParamAsFloats("density", {}); + bool exist; _aspect_ratios.push_back(1.0f); @@ -70,6 +83,20 @@ class PriorBoxImpl: public ExtLayerBase { _num_priors = static_cast(_aspect_ratios.size() + _min_sizes.size() - 1); } + if (_fixed_sizes.size() > 0) { + _num_priors = static_cast(_aspect_ratios.size() * _fixed_sizes.size()); + } + + if (_densitys.size() > 0) { + for (size_t i = 0; i < _densitys.size(); ++i) { + if (_fixed_ratios.size() > 0) { + _num_priors += (_fixed_ratios.size()) * (static_cast(pow(_densitys[i], 2)) - 1); + } else { + _num_priors += (_aspect_ratios.size()) * (static_cast(pow(_densitys[i], 2)) - 1); + } + } + } + for (auto it = _max_sizes.begin(); it != _max_sizes.end(); it++) { _num_priors += 1; } @@ -133,9 +160,11 @@ class PriorBoxImpl: public ExtLayerBase { step_y = _step; } + float IWI = 1.0f / static_cast(IW); + float IHI = 1.0f / static_cast(IH); + float* dst_data = dstMemPtr->buffer(); - int dim = H * W * _num_priors * 4; int idx = 0; float center_x = 0.0f; float center_y = 0.0f; @@ -145,30 +174,106 @@ class PriorBoxImpl: public ExtLayerBase { for (int h = 0; h < H; ++h) { for (int w = 0; w < W; ++w) { - for (size_t msIdx = 0; msIdx < _min_sizes.size(); msIdx++) { - if (_step == 0) { - center_x = (w + 0.5f) * step_x; - center_y = (h + 0.5f) * step_y; + if (_step == 0) { + center_x = (w + 0.5f) * step_x; + center_y = (h + 0.5f) * step_y; + } else { + center_x = (_offset + w) * _step; + center_y = (_offset + h) * _step; + } + + for (size_t s = 0; s < _fixed_sizes.size(); ++s) { + size_t fixed_size_ = static_cast(_fixed_sizes[s]); + box_width = box_height = fixed_size_ * 0.5f; + + if (_fixed_ratios.size() > 0) { + for (float ar : _fixed_ratios) { + size_t density_ = static_cast(_densitys[s]); + int shift = static_cast(_fixed_sizes[s] / density_); + ar = sqrt(ar); + float box_width_ratio = _fixed_sizes[s] * 0.5f * ar; + float box_height_ratio = _fixed_sizes[s] * 0.5f / ar; + for (size_t r = 0; r < density_; ++r) { + for (size_t c = 0; c < density_; ++c) { + float center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift; + float center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift; + + // xmin + dst_data[idx++] = clip_less((center_x_temp - box_width_ratio) * IWI, 0); + // ymin + dst_data[idx++] = clip_less((center_y_temp - box_height_ratio) * IHI, 0); + // xmax + dst_data[idx++] = clip_great((center_x_temp + box_width_ratio) * IWI, 1); + // ymax + dst_data[idx++] = clip_great((center_y_temp + box_height_ratio) * IHI, 1); + } + } + } } else { - center_x = (_offset + w) * _step; - center_y = (_offset + h) * _step; + if (_densitys.size() > 0) { + int density_ = static_cast(_densitys[s]); + int shift = static_cast(_fixed_sizes[s] / density_); + for (int r = 0; r < density_; ++r) { + for (int c = 0; c < density_; ++c) { + float center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift; + float center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift; + + // xmin + dst_data[idx++] = clip_less((center_x_temp - box_width) * IWI, 0); + // ymin + dst_data[idx++] = clip_less((center_y_temp - box_height) * IHI, 0); + // xmax + dst_data[idx++] = clip_great((center_x_temp + box_width) * IWI, 1); + // ymax + dst_data[idx++] = clip_great((center_y_temp + box_height) * IHI, 1); + } + } + } + // Rest of priors + for (float ar : _aspect_ratios) { + if (fabs(ar - 1.) < 1e-6) { + continue; + } + + int density_ = static_cast(_densitys[s]); + int shift = static_cast(_fixed_sizes[s] / density_); + ar = sqrt(ar); + float box_width_ratio = _fixed_sizes[s] * 0.5f * ar; + float box_height_ratio = _fixed_sizes[s] * 0.5f / ar; + for (int r = 0; r < density_; ++r) { + for (int c = 0; c < density_; ++c) { + float center_x_temp = center_x - fixed_size_ / 2 + shift / 2.f + c * shift; + float center_y_temp = center_y - fixed_size_ / 2 + shift / 2.f + r * shift; + // xmin + dst_data[idx++] = clip_less((center_x_temp - box_width_ratio) * IWI, 0); + // ymin + dst_data[idx++] = clip_less((center_y_temp - box_height_ratio) * IHI, 0); + // xmax + dst_data[idx++] = clip_great((center_x_temp + box_width_ratio) * IWI, 1); + // ymax + dst_data[idx++] = clip_great((center_y_temp + box_height_ratio) * IHI, 1); + } + } + } } + } - box_width = _min_sizes[msIdx]; - box_height = _min_sizes[msIdx]; + for (size_t msIdx = 0; msIdx < _min_sizes.size(); msIdx++) { + box_width = _min_sizes[msIdx] * 0.5f; + box_height = _min_sizes[msIdx] * 0.5f; - dst_data[idx++] = (center_x - box_width / 2.0f) / IW; - dst_data[idx++] = (center_y - box_height / 2.0f) / IH; - dst_data[idx++] = (center_x + box_width / 2.0f) / IW; - dst_data[idx++] = (center_y + box_height / 2.0f) / IH; + dst_data[idx++] = (center_x - box_width) * IWI; + dst_data[idx++] = (center_y - box_height) * IHI; + dst_data[idx++] = (center_x + box_width) * IWI; + dst_data[idx++] = (center_y + box_height) * IHI; if (_max_sizes.size() > msIdx) { - box_width = box_height = sqrt(_min_sizes[msIdx] * _max_sizes[msIdx]); + box_width = box_height = sqrt(_min_sizes[msIdx] * _max_sizes[msIdx]) * 0.5f; - dst_data[idx++] = (center_x - box_width / 2.0f) / IW; - dst_data[idx++] = (center_y - box_height / 2.0f) / IH; - dst_data[idx++] = (center_x + box_width / 2.0f) / IW; - dst_data[idx++] = (center_y + box_height / 2.0f) / IH; + dst_data[idx++] = (center_x - box_width) * IWI; + dst_data[idx++] = (center_y - box_height) * IHI; + dst_data[idx++] = (center_x + box_width) * IWI; + dst_data[idx++] = (center_y + box_height) * IHI; } if (_scale_all_sizes || (!_scale_all_sizes && (msIdx == _min_sizes.size() - 1))) { @@ -178,13 +283,14 @@ class PriorBoxImpl: public ExtLayerBase { continue; } - box_width = _min_sizes[sIdx] * sqrt(ar); - box_height = _min_sizes[sIdx] / sqrt(ar); + ar = sqrt(ar); + box_width = _min_sizes[sIdx] * 0.5f * ar; + box_height = _min_sizes[sIdx] * 0.5f / ar; - dst_data[idx++] = (center_x - box_width / 2.0f) / IW; - dst_data[idx++] = (center_y - box_height / 2.0f) / IH; - dst_data[idx++] = (center_x + box_width / 2.0f) / IW; - dst_data[idx++] = (center_y + box_height / 2.0f) / IH; + dst_data[idx++] = (center_x - box_width) * IWI; + dst_data[idx++] = (center_y - box_height) * IHI; + dst_data[idx++] = (center_x + box_width) * IWI; + dst_data[idx++] = (center_y + box_height) * IHI; } } } @@ -192,31 +298,23 @@ class PriorBoxImpl: public ExtLayerBase { } if (_clip) { - for (int d = 0; d < dim; ++d) { - dst_data[d] = (std::min)((std::max)(dst_data[d], 0.0f), 1.0f); - } + parallel_for((H * W * _num_priors * 4), [&](size_t i) { + dst_data[i] = (std::min)((std::max)(dst_data[i], 0.0f), 1.0f); + }); } - int channel_size = OH * OW; - + size_t channel_size = OH * OW; dst_data += channel_size; - - int count = 0; if (_variance.size() == 1) { - for (int i = 0; i < channel_size; i++) { + parallel_for(channel_size, [&](size_t i) { dst_data[i] = _variance[0]; - } + }); } else { - for (int h = 0; h < H; ++h) { - for (int w = 0; w < W; ++w) { - for (int i = 0; i < _num_priors; ++i) { - for (int j = 0; j < 4; ++j) { - dst_data[count] = _variance[j]; - ++count; - } - } + parallel_for((H * W * _num_priors), [&](size_t i) { + for (size_t j = 0; j < 4; ++j) { + dst_data[i * 4 + j] = _variance[j]; } - } + }); } return OK; } @@ -230,6 +328,10 @@ class PriorBoxImpl: public ExtLayerBase { bool _clip = false; bool _scale_all_sizes = true; + std::vector _fixed_sizes; + std::vector _fixed_ratios; + std::vector _densitys; + std::vector _aspect_ratios; std::vector _variance; diff --git a/inference-engine/src/extension/ext_priorbox_clustered.cpp b/inference-engine/src/extension/ext_priorbox_clustered.cpp index 40fd273844c5f6..a90f898fe1f28f 100644 --- a/inference-engine/src/extension/ext_priorbox_clustered.cpp +++ b/inference-engine/src/extension/ext_priorbox_clustered.cpp @@ -18,8 +18,8 @@ class PriorBoxClusteredImpl: public ExtLayerBase { if (layer->insData.size() != 2 || layer->outData.empty()) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[0].lock()->dims.size() != 4 || - layer->insData[1].lock()->dims.size() != 4) + if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4 || + layer->insData[1].lock()->getTensorDesc().getDims().size() != 4) THROW_IE_EXCEPTION << "PriorBoxClustered supports only 4D blobs!"; widths_ = layer->GetParamAsFloats("width", {}); diff --git a/inference-engine/src/extension/ext_priorgridgenerator_onnx.cpp b/inference-engine/src/extension/ext_priorgridgenerator_onnx.cpp index b3c856e4c279d7..fbb56531ef7675 100644 --- a/inference-engine/src/extension/ext_priorgridgenerator_onnx.cpp +++ b/inference-engine/src/extension/ext_priorgridgenerator_onnx.cpp @@ -33,11 +33,11 @@ class ExperimentalDetectronPriorGridGeneratorImpl: public ExtLayerBase { if (layer->insData.size() > 3 || layer->outData.empty()) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[INPUT_PRIORS].lock()->dims.size() != 2 || + if (layer->insData[INPUT_PRIORS].lock()->getTensorDesc().getDims().size() != 2 || (layer->insData.size() > INPUT_FEATUREMAP && - layer->insData[INPUT_FEATUREMAP].lock()->dims.size() != 4) || + layer->insData[INPUT_FEATUREMAP].lock()->getTensorDesc().getDims().size() != 4) || (layer->insData.size() > INPUT_IMAGE && - layer->insData[INPUT_IMAGE].lock()->dims.size() != 4)) + layer->insData[INPUT_IMAGE].lock()->getTensorDesc().getDims().size() != 4)) THROW_IE_EXCEPTION << "Unsupported shape of input blobs!"; grid_w_ = layer->GetParamAsInt("w", 0); diff --git a/inference-engine/src/extension/ext_proposal.cpp b/inference-engine/src/extension/ext_proposal.cpp index 4b264ab94107ac..b2de6f63eb9804 100644 --- a/inference-engine/src/extension/ext_proposal.cpp +++ b/inference-engine/src/extension/ext_proposal.cpp @@ -155,13 +155,23 @@ void enumerate_proposals_cpu(const float* bottom4d, const float* d_anchor4d, con }); } -static void unpack_boxes(const float* p_proposals, float* unpacked_boxes, int pre_nms_topn) { - parallel_for(pre_nms_topn, [&](size_t i) { - unpacked_boxes[0*pre_nms_topn + i] = p_proposals[5*i + 0]; - unpacked_boxes[1*pre_nms_topn + i] = p_proposals[5*i + 1]; - unpacked_boxes[2*pre_nms_topn + i] = p_proposals[5*i + 2]; - unpacked_boxes[3*pre_nms_topn + i] = p_proposals[5*i + 3]; - }); +static void unpack_boxes(const float* p_proposals, float* unpacked_boxes, int pre_nms_topn, bool store_prob) { + if (store_prob) { + parallel_for(pre_nms_topn, [&](size_t i) { + unpacked_boxes[0 * pre_nms_topn + i] = p_proposals[5 * i + 0]; + unpacked_boxes[1 * pre_nms_topn + i] = p_proposals[5 * i + 1]; + unpacked_boxes[2 * pre_nms_topn + i] = p_proposals[5 * i + 2]; + unpacked_boxes[3 * pre_nms_topn + i] = p_proposals[5 * i + 3]; + unpacked_boxes[4 * pre_nms_topn + i] = p_proposals[5 * i + 4]; + }); + } else { + parallel_for(pre_nms_topn, [&](size_t i) { + unpacked_boxes[0 * pre_nms_topn + i] = p_proposals[5 * i + 0]; + unpacked_boxes[1 * pre_nms_topn + i] = p_proposals[5 * i + 1]; + unpacked_boxes[2 * pre_nms_topn + i] = p_proposals[5 * i + 2]; + unpacked_boxes[3 * pre_nms_topn + i] = p_proposals[5 * i + 3]; + }); + } } static @@ -293,11 +303,12 @@ void retrieve_rois_cpu(const int num_rois, const int item_index, const int num_proposals, const float* proposals, const int roi_indices[], float* rois, int post_nms_topn_, - bool normalize, float img_h, float img_w, bool clip_after_nms) { + bool normalize, float img_h, float img_w, bool clip_after_nms, float* probs) { const float *src_x0 = proposals + 0 * num_proposals; const float *src_y0 = proposals + 1 * num_proposals; const float *src_x1 = proposals + 2 * num_proposals; const float *src_y1 = proposals + 3 * num_proposals; + const float *src_probs = proposals + 4 * num_proposals; parallel_for(num_rois, [&](size_t roi) { int index = roi_indices[roi]; @@ -326,6 +337,9 @@ void retrieve_rois_cpu(const int num_rois, const int item_index, rois[roi * 5 + 2] = y0; rois[roi * 5 + 3] = x1; rois[roi * 5 + 4] = y1; + + if (probs) + probs[roi] = src_probs[index]; }); if (num_rois < post_nms_topn_) { @@ -342,10 +356,10 @@ class ProposalImpl : public ExtLayerBase { public: explicit ProposalImpl(const CNNLayer *layer) { try { - if (layer->insData.size() != 3 || layer->outData.size() != 1) + if (layer->insData.size() != 3 || (layer->outData.size() != 1 && layer->outData.size() != 2)) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[0].lock()->dims.size() != 4) + if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4) THROW_IE_EXCEPTION << "Proposal supports only 4D blobs!"; feat_stride_ = static_cast(layer->GetParamAsInt("feat_stride")); @@ -358,9 +372,9 @@ class ProposalImpl : public ExtLayerBase { box_size_scale_ = layer->GetParamAsFloat("box_size_scale", 1.0); scales = layer->GetParamAsFloats("scale", {}); ratios = layer->GetParamAsFloats("ratio", {}); - normalize_ = layer->GetParamsAsBool("normalize", false); - clip_before_nms = layer->GetParamsAsBool("clip_before_nms", true); - clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false); + normalize_ = layer->GetParamAsBool("normalize", false); + clip_before_nms = layer->GetParamAsBool("clip_before_nms", true); + clip_after_nms = layer->GetParamAsBool("clip_after_nms", false); anchors_shape_0 = ratios.size() * scales.size(); anchors_.resize(anchors_shape_0 * 4); @@ -383,8 +397,16 @@ class ProposalImpl : public ExtLayerBase { coordinates_offset, shift_anchors, round_ratios); roi_indices_.resize(post_nms_topn_); - addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, - {DataConfigurator(ConfLayout::PLN)}); + + store_prob = layer->outData.size() == 2; + + if (store_prob) { + addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, + {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}); + } else { + addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, + {DataConfigurator(ConfLayout::PLN)}); + } } catch (const InferenceEngine::details::InferenceEngineException &ex) { errorMsg = ex.what(); } @@ -402,6 +424,9 @@ class ProposalImpl : public ExtLayerBase { const float *p_d_anchor_item = inputs[1]->buffer(); const float *p_img_info_cpu = inputs[2]->buffer(); float *p_roi_item = outputs[0]->buffer(); + float *p_prob_item = nullptr; + if (store_prob) + p_prob_item = outputs[1]->buffer(); size_t img_info_size = inputs[2]->getTensorDesc().getDims()[1]; @@ -445,7 +470,8 @@ class ProposalImpl : public ExtLayerBase { float score; }; std::vector proposals_(num_proposals); - std::vector unpacked_boxes(4 * pre_nms_topn); + const int unpacked_boxes_buffer_size = store_prob ? 5 * pre_nms_topn : 4 * pre_nms_topn; + std::vector unpacked_boxes(unpacked_boxes_buffer_size); std::vector is_dead(pre_nms_topn); // Execute @@ -463,12 +489,14 @@ class ProposalImpl : public ExtLayerBase { return (struct1.score > struct2.score); }); - unpack_boxes(reinterpret_cast(&proposals_[0]), &unpacked_boxes[0], pre_nms_topn); + unpack_boxes(reinterpret_cast(&proposals_[0]), &unpacked_boxes[0], pre_nms_topn, store_prob); nms_cpu(pre_nms_topn, &is_dead[0], &unpacked_boxes[0], &roi_indices_[0], &num_rois, 0, nms_thresh_, post_nms_topn_, coordinates_offset); + + float* p_probs = store_prob ? p_prob_item + n * post_nms_topn_ : nullptr; retrieve_rois_cpu(num_rois, n, pre_nms_topn, &unpacked_boxes[0], &roi_indices_[0], p_roi_item + n * post_nms_topn_ * 5, - post_nms_topn_, normalize_, img_H, img_W, clip_after_nms); + post_nms_topn_, normalize_, img_H, img_W, clip_after_nms, p_probs); } return OK; @@ -506,32 +534,10 @@ class ProposalImpl : public ExtLayerBase { bool clip_after_nms; // clip bounding boxes after nms step bool round_ratios; // round ratios during anchors generation stage bool shift_anchors; // shift anchors by half size of the box + bool store_prob; // store blob with proposal probabilities }; -class ProposalFactory : public ImplFactory { -public: - explicit ProposalFactory(const CNNLayer *layer): ImplFactory(layer) {} - // set output shapes by input shapes. - StatusCode getShapes(const std::vector& inShapes, std::vector& outShapes, - ResponseDesc *resp) noexcept override { - try { - if (inShapes.size() != 1) { - THROW_IE_EXCEPTION << "Incorrect input shapes!"; - } - outShapes.clear(); - outShapes.emplace_back(cnnLayer.precision, inShapes[0].getDims(), inShapes[0].getLayout()); - return OK; - } catch (const InferenceEngine::details::InferenceEngineException& e) { - if (resp) { - std::string errorMsg = e.what(); - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return GENERAL_ERROR; - } - } -}; - -REG_FACTORY_FOR(ProposalFactory, Proposal); +REG_FACTORY_FOR(ImplFactory, Proposal); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/extension/ext_proposal_onnx.cpp b/inference-engine/src/extension/ext_proposal_onnx.cpp index 18cc8b330b9a3d..39ff4a45b80925 100644 --- a/inference-engine/src/extension/ext_proposal_onnx.cpp +++ b/inference-engine/src/extension/ext_proposal_onnx.cpp @@ -416,30 +416,7 @@ class ONNXCustomProposalImpl : public ExtLayerBase { std::vector roi_indices_; }; -class ONNXCustomProposalFactory : public ImplFactory { -public: - explicit ONNXCustomProposalFactory(const CNNLayer *layer): ImplFactory(layer) {} - // set output shapes by input shapes. - StatusCode getShapes(const std::vector& inShapes, std::vector& outShapes, - ResponseDesc *resp) noexcept override { - try { - if (inShapes.size() != 1) { - THROW_IE_EXCEPTION << "Incorrect input shapes!"; - } - outShapes.clear(); - outShapes.emplace_back(cnnLayer.precision, inShapes[0].getDims(), inShapes[0].getLayout()); - return OK; - } catch (const InferenceEngine::details::InferenceEngineException& e) { - if (resp) { - std::string errorMsg = e.what(); - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return GENERAL_ERROR; - } - } -}; - -REG_FACTORY_FOR(ONNXCustomProposalFactory, ExperimentalDetectronGenerateProposalsSingleImage); +REG_FACTORY_FOR(ImplFactory, ExperimentalDetectronGenerateProposalsSingleImage); } // namespace Cpu } // namespace Extensions diff --git a/inference-engine/src/extension/ext_psroi.cpp b/inference-engine/src/extension/ext_psroi.cpp index 71bd3f654dfde1..fde8baf779a4c9 100644 --- a/inference-engine/src/extension/ext_psroi.cpp +++ b/inference-engine/src/extension/ext_psroi.cpp @@ -18,17 +18,18 @@ class PSROIPoolingImpl: public ExtLayerBase { public: explicit PSROIPoolingImpl(const CNNLayer* layer) { try { - if (layer->insData.size() != 2 || layer->outData.size() != 1) - THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; + mode_ = layer->GetParamAsString("mode", "average"); + if (mode_ != "bilinear_deformable") + if (layer->insData.size() != 2 || layer->outData.size() != 1) + THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; // LayerSetUp output_dim_ = static_cast(layer->GetParamAsInt("output_dim")); group_size_ = static_cast(layer->GetParamAsInt("group_size")); spatial_scale_ = layer->GetParamAsFloat("spatial_scale"); - pooled_height_ = group_size_; - pooled_width_ = group_size_; + pooled_height_ = static_cast(layer->GetParamAsInt("pooled_height", static_cast(group_size_))); + pooled_width_ = static_cast(layer->GetParamAsInt("pooled_width", static_cast(group_size_))); spatial_bins_x_ = static_cast(layer->GetParamAsInt("spatial_bins_x", 1)); spatial_bins_y_ = static_cast(layer->GetParamAsInt("spatial_bins_y", 1)); - mode_ = layer->GetParamAsString("mode", "average"); SizeVector inDims = layer->insData[0].lock()->getTensorDesc().getDims(); channels = static_cast(inDims[1]); @@ -41,7 +42,17 @@ class PSROIPoolingImpl: public ExtLayerBase { nh = static_cast(outDims[2]); nw = static_cast(outDims[3]); - addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)}); + // for Deformable PSROIPolling + no_trans_ = layer->GetParamAsBool("no_trans", true); + part_size_ = layer->GetParamAsInt("part_size", 1); + trans_std_ = layer->GetParamAsFloat("trans_std", 1); + + if (no_trans_) { + addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)}); + } else { + addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), + DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)}); + } } catch (InferenceEngine::details::InferenceEngineException &ex) { errorMsg = ex.what(); } @@ -62,6 +73,16 @@ class PSROIPoolingImpl: public ExtLayerBase { } } + // for Deformable PSROIPooling + float *bottom_trans = nullptr; + int num_classes = 1; + int channels_each_class = output_dim_; + if (!no_trans_) { + bottom_trans = inputs[2]->buffer(); + num_classes = static_cast(inputs[2]->getTensorDesc().getDims()[1]) / 2; + channels_each_class /= num_classes; + } + size_t num_bins = spatial_bins_x_*spatial_bins_y_; parallel_for(real_rois, [&](int n) { @@ -89,6 +110,14 @@ class PSROIPoolingImpl: public ExtLayerBase { // Force too small ROIs to be 1x1 roi_width = std::max(roi_end_w - roi_start_w, 0.1f); // avoid 0 roi_height = std::max(roi_end_h - roi_start_h, 0.1f); + } else if (mode_ == "bilinear_deformable") { + roi_start_w = static_cast(round(bottom_rois[1])) * spatial_scale_ - 0.5f; + roi_start_h = static_cast(round(bottom_rois[2])) * spatial_scale_ - 0.5f; + roi_end_w = static_cast(round(bottom_rois[3]) + 1.0f) * spatial_scale_ - 0.5f; + roi_end_h = static_cast(round(bottom_rois[4]) + 1.0f) * spatial_scale_ - 0.5f; + // Force too small ROIs to be 1x1 + roi_width = std::max(roi_end_w - roi_start_w, 0.1f); // avoid 0 + roi_height = std::max(roi_end_h - roi_start_h, 0.1f); } for (int c = 0; c < nc; c++) { @@ -172,6 +201,51 @@ class PSROIPoolingImpl: public ExtLayerBase { } } dst_data[index] /= num_bins; + } else if (mode_ == "bilinear_deformable") { + // Compute w and h at bottom + float bin_size_h = roi_height / static_cast(pooled_height_); + float bin_size_w = roi_width / static_cast(pooled_width_); + + float sub_bin_size_h = bin_size_h / static_cast(spatial_bins_x_); + float sub_bin_size_w = bin_size_w / static_cast(spatial_bins_y_); + + int part_h = h * part_size_ / pooled_height_; + int part_w = w * part_size_ / pooled_width_; + int class_id = c / channels_each_class; + float trans_x = no_trans_ ? 0 : + bottom_trans[(((n * num_classes + class_id) * 2) * part_size_ + part_h) + * part_size_ + part_w] * trans_std_; + float trans_y = no_trans_ ? 0 : + bottom_trans[(((n * num_classes + class_id) * 2 + 1) * part_size_ + part_h) + * part_size_ + part_w] * trans_std_; + + float wstart = w * bin_size_w + roi_start_w + trans_x * roi_width; + float hstart = h * bin_size_h + roi_start_h + trans_y * roi_height; + + float sum = 0; + int count = 0; + int gw = w * group_size_ / pooled_width_; + int gh = h * group_size_ / pooled_height_; + gw = std::min(std::max(gw, 0), static_cast(group_size_ - 1)); + gh = std::min(std::max(gh, 0), static_cast(group_size_ - 1)); + + const float* offset_bottom_data = bottom_data_beginning + (roi_batch_ind * channels) * height * width; + for (size_t ih = 0; ih < spatial_bins_y_; ih++) { + for (size_t iw = 0; iw < spatial_bins_x_; iw++) { + float w1 = wstart + iw * sub_bin_size_w; + float h1 = hstart + ih * sub_bin_size_h; + // bilinear interpolation + if (w1 < -0.5 || w1 > width - 0.5 || h1 < -0.5 || h1 > height - 0.5) + continue; + w1 = static_cast(std::min(std::max(static_cast(w1), 0.0), width - 1.0)); + h1 = static_cast(std::min(std::max(static_cast(h1), 0.0), height - 1.0)); + int c1 = static_cast((c * group_size_ + gh) * group_size_ + gw); + float val = bilinear_interp(offset_bottom_data + c1 * height * width, w1, h1, width); + sum += val; + count++; + } + } + dst_data[index] = count == 0 ? 0 : sum / count; } } } @@ -188,6 +262,22 @@ class PSROIPoolingImpl: public ExtLayerBase { return OK; } + inline float bilinear_interp(const float* data, const float x, const float y, const int width) { + int x1 = static_cast(std::floor(x)); + int x2 = static_cast(std::ceil(x)); + int y1 = static_cast(std::floor(y)); + int y2 = static_cast(std::ceil(y)); + float dist_x = x - x1; + float dist_y = y - y1; + float value11 = data[y1 * width + x1]; + float value12 = data[y2 * width + x1]; + float value21 = data[y1 * width + x2]; + float value22 = data[y2 * width + x2]; + float value = (1 - dist_x) * (1 - dist_y) * value11 + (1 - dist_x) * dist_y * value12 + + dist_x * (1 - dist_y) * value21 + dist_x * dist_y * value22; + return value; + } + private: size_t output_dim_ = 0; size_t group_size_ = 0; @@ -206,6 +296,11 @@ class PSROIPoolingImpl: public ExtLayerBase { int nc = 0; int nh = 0; int nw = 0; + + // for Deformable PSROIPolling + bool no_trans_; + int part_size_; + float trans_std_; }; REG_FACTORY_FOR(ImplFactory, PSROIPooling); diff --git a/inference-engine/src/extension/ext_range.cpp b/inference-engine/src/extension/ext_range.cpp index 995924f19f2a4a..4858d87c1e2afe 100644 --- a/inference-engine/src/extension/ext_range.cpp +++ b/inference-engine/src/extension/ext_range.cpp @@ -63,7 +63,7 @@ class RangeImpl: public ExtLayerBase { StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { StatusCode retcode = OK; - switch (outputs[0]->precision()) { + switch (outputs[0]->getTensorDesc().getPrecision()) { case Precision::FP32: { retcode = range((inputs[RANGE_START]->cbuffer().as() + inputs[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0], diff --git a/inference-engine/src/extension/ext_reduce.cpp b/inference-engine/src/extension/ext_reduce.cpp new file mode 100644 index 00000000000000..16d6decfee7bba --- /dev/null +++ b/inference-engine/src/extension/ext_reduce.cpp @@ -0,0 +1,357 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include +#include +#include +#include +#include +#include +#include "ie_parallel.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class ReduceImpl: public ExtLayerBase { +public: + explicit ReduceImpl(const CNNLayer* layer) { + try { + if (layer->insData.empty() || layer->outData.empty()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; + + if (layer->insData.size() != 2) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; + + idx_dims = layer->insData[REDUCE_INDEXES].lock()->getTensorDesc().getDims(); + if (idx_dims.size() > 1) + THROW_IE_EXCEPTION << layer->name << " Index vector should be 1 dimension"; + + if (layer->insData[REDUCE_DATA].lock()->getTensorDesc().getPrecision() != Precision::FP32) + THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only FP32 is supported!"; + + if (layer->insData[REDUCE_INDEXES].lock()->getTensorDesc().getPrecision() != Precision::I32) + THROW_IE_EXCEPTION << layer->name << " Incorrect 'axes_to_reduction' input precision. Only I32 is supported!"; + + data_dims = layer->insData[REDUCE_DATA].lock()->getTensorDesc().getDims(); + SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims(); + + keep_dims = layer->GetParamAsBool("keep_dims", true); + if (keep_dims) { + if (data_dims.size() != dst_dims.size()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!"; + } else { + if (data_dims.size() <= dst_dims.size()) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!"; + } + + std::string reduce_mode = layer->type; + if (reduce_mode == "ReduceAnd") reduceMode = Reduce::And; + else if (reduce_mode == "ReduceL1") reduceMode = Reduce::L1; + else if (reduce_mode == "ReduceL2") reduceMode = Reduce::L2; + else if (reduce_mode == "ReduceLogSum") reduceMode = Reduce::LogSum; + else if (reduce_mode == "ReduceLogSumExp") reduceMode = Reduce::LogSumExp; + else if (reduce_mode == "ReduceMax") reduceMode = Reduce::Max; + else if (reduce_mode == "ReduceMean") reduceMode = Reduce::Mean; + else if (reduce_mode == "ReduceMin") reduceMode = Reduce::Min; + else if (reduce_mode == "ReduceOr") reduceMode = Reduce::Or; + else if (reduce_mode == "ReduceProd") reduceMode = Reduce::Prod; + else if (reduce_mode == "ReduceSum") reduceMode = Reduce::Sum; + else if (reduce_mode == "ReduceSumSquare") reduceMode = Reduce::SumSquare; + else + THROW_IE_EXCEPTION << layer->name << " Incorrect Reduce layer type!"; + + src_dims = layer->insData[REDUCE_DATA].lock()->getTensorDesc().getDims(); + srcStrides = layer->insData[REDUCE_DATA].lock()->getTensorDesc().getBlockingDesc().getStrides(); + + addConfig(layer, { { ConfLayout::PLN, false }, { ConfLayout::PLN, false } }, { { ConfLayout::PLN, false } }); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { + int32_t *idx_data = inputs[REDUCE_INDEXES]->cbuffer().as() + + inputs[REDUCE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + SizeVector axes; + for (size_t i = 0; i < idx_dims[0]; i++) { + int32_t axis = idx_data[i]; + if (axis < 0) + axis += data_dims.size(); + + if (static_cast(axis) > data_dims.size()) { + if (resp) { + std::string errorMsg = "Index to reduce exceeds data tensor dimension"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return PARAMETER_MISMATCH; + } + axes.push_back(static_cast(axis)); + } + + size_t reduced_dims_work_amount = 1; + InferenceEngine::SizeVector our_dims, out_dims, axes_for_reduction; + for (size_t i = 0; i < src_dims.size(); i++) { + bool found = false; + for (size_t axis : axes) + if (i == axis) found = true; + + if (found) { + axes_for_reduction.push_back(i); + reduced_dims_work_amount *= src_dims[i]; + if (keep_dims) out_dims.push_back(1); + our_dims.push_back(1); + } else { + out_dims.push_back(src_dims[i]); + our_dims.push_back(src_dims[i]); + } + } + + InferenceEngine::SizeVector dst_dims = outputs[0]->getTensorDesc().getDims(); + for (size_t i = 0; i < (std::min)(out_dims.size(), dst_dims.size()); i++) { + if (out_dims[i] != dst_dims[i]) { + if (resp) { + std::string errorMsg = "Incorrect number of output dimensions!"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return PARAMETER_MISMATCH; + } + } + + const float *src_data = inputs[REDUCE_DATA]->cbuffer().as() + + inputs[REDUCE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + float* dst_data = outputs[0]->cbuffer().as() + + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); + size_t work_amount_dst = outputs[0]->getTensorDesc().getBlockingDesc().getStrides()[0] * dst_dims[0]; + + switch (reduceMode) { + case Reduce::And: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 1.0f, + [](float x, float y)->float { return x && y; }, + [](float x, float y)->float { return x && y; }); + break; + case Reduce::L1: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float old, float y)->float { return old + (std::abs)(y); }, + [](float x, float y)->float { return x + y; }); + break; + case Reduce::L2: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float old, float y)->float { return old + y * y;}, + [](float x, float y)->float { return x + y; }); + + parallel_for(work_amount_dst, [&](size_t i) { + dst_data[i] = sqrt(dst_data[i]); + }); + break; + case Reduce::LogSum: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float x, float y)->float { return x + y; }, + [](float x, float y)->float { return x + y; }); + + parallel_for(work_amount_dst, [&](size_t i) { + dst_data[i] = logf(dst_data[i]); + }); + break; + case Reduce::LogSumExp: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float old, float y)->float { return old + expf(y); }, + [](float x, float y)->float { return x + y; }); + + parallel_for(work_amount_dst, [&](size_t i) { + dst_data[i] = logf(dst_data[i]); + }); + break; + case Reduce::Max: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, FLT_MIN, + [](float x, float y)->float { return x > y ? x : y; }, + [](float x, float y)->float { return x > y ? x : y; }); + break; + case Reduce::Mean: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float x, float y)->float { return x + y; }, + [](float x, float y)->float { return x + y; }); + + parallel_for(work_amount_dst, [&](size_t i) { + dst_data[i] /= static_cast(reduced_dims_work_amount); + }); + break; + case Reduce::Min: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, FLT_MAX, + [](float x, float y)->float { return x < y ? x : y; }, + [](float x, float y)->float { return x < y ? x : y; }); + break; + case Reduce::Or: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float x, float y)->float { return x || y; }, + [](float x, float y)->float { return x || y; }); + break; + case Reduce::Prod: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 1.0f, + [](float x, float y)->float { return x * y; }, + [](float x, float y)->float { return x * y; }); + break; + case Reduce::Sum: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float x, float y)->float { return x + y; }, + [](float x, float y)->float { return x + y; }); + break; + case Reduce::SumSquare: + reduce(src_data, dst_data, work_amount_dst, reduced_dims_work_amount, axes_for_reduction, our_dims, 0.0f, + [](float old, float y)->float { return old + y * y; }, + [](float x, float y)->float { return x + y; }); + break; + default: + if (resp) { + std::string errorMsg = "Incorrect Reduce layer type"; + errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); + } + return GENERAL_ERROR; + } + return OK; + } + +private: + template + void reduce(const float *src_data, float* dst_data, size_t work_amount_dst, size_t reduced_dims_work_amount, + SizeVector axes_for_reduction, SizeVector dst_dims, float init_value, F1 func1, F2 func2); + + enum class Reduce { And, L1, L2, LogSum, LogSumExp, Max, Mean, Min, Or, Prod, Sum, SumSquare }; + + const size_t REDUCE_DATA = 0; + const size_t REDUCE_INDEXES = 1; + bool keep_dims = true; + Reduce reduceMode = Reduce::Sum; + SizeVector data_dims; + SizeVector idx_dims; + SizeVector src_dims; + SizeVector srcStrides; +}; + +template +void ReduceImpl::reduce( + const float *src_data, + float *dst_data, + size_t work_amount_dst, + size_t reduced_dims_work_amount, + SizeVector axes_for_reduction, + SizeVector dst_dims, + float init_value, + F1 func1, + F2 func2 +) { + unsigned int nthr = parallel_get_max_threads(); + if ((work_amount_dst + 1) >= nthr) { + parallel_nt(0, [&](const int ithr, const int nthr) { + int j; + size_t i, start = 0, end = 0; + SizeVector dst_counters(dst_dims.size(), 0); + splitter(work_amount_dst, nthr, ithr, start, end); + for (j = dst_dims.size() - 1, i = start; j >= 0; j--) { + dst_counters[j] = i % dst_dims[j]; + i /= dst_dims[j]; + } + for (size_t src_idx, dst_idx = start; dst_idx < end; ++dst_idx) { + float reduce_prod = init_value; + bool update_idx = true; + SizeVector src_counters = dst_counters; + for (i = 0; i < reduced_dims_work_amount; ++i) { + if (update_idx) { + src_idx = 0; + for (j = 0; j < static_cast(src_dims.size()); ++j) + src_idx += (src_counters[j] % src_dims[j]) * srcStrides[j]; + update_idx = false; + } + reduce_prod = func1(reduce_prod, src_data[src_idx]); + for (j = axes_for_reduction.size() - 1; j >= 0; j--) { + src_counters[axes_for_reduction[j]]++; + if (src_counters[axes_for_reduction[j]] < src_dims[axes_for_reduction[j]]) { + src_idx += srcStrides[axes_for_reduction[j]]; + break; + } else { + src_counters[axes_for_reduction[j]] = 0; + update_idx = true; + } + } + } + dst_data[dst_idx] = reduce_prod; + for (j = dst_dims.size() - 1; j >= 0; j--) { + dst_counters[j]++; + if (dst_counters[j] < dst_dims[j]) + break; + else + dst_counters[j] = 0; + } + } + }); + } else { + std::vector reduce_prod((nthr * work_amount_dst), init_value); + if (work_amount_dst == 1) { + parallel_nt(nthr, [&](const int ithr, const int nthr) { + size_t i, start = 0, end = 0; + splitter((srcStrides[0] * src_dims[0]), nthr, ithr, start, end); + for (i = start; i < end; ++i) + reduce_prod[ithr] = func1(reduce_prod[ithr], src_data[i]); + }); + } else { + SizeVector dstStrides(dst_dims.size(), 1); + for (int j = dst_dims.size() - 1; j >= 1; --j) + dstStrides[j - 1] = dstStrides[j] * dst_dims[j]; + parallel_nt(nthr, [&](const int ithr, const int nthr) { + int j; + bool update_idx = true; + size_t i, src_idx, dst_idx = 0, start = 0, end = 0; + splitter((srcStrides[0] * src_dims[0]), nthr, ithr, start, end); + SizeVector src_counters(src_dims.size(), 0); + for (j = src_dims.size() - 1, src_idx = start; j >= 0; j--) { + src_counters[j] = src_idx % src_dims[j]; + src_idx /= src_dims[j]; + } + for (src_idx = start; src_idx < end; ++src_idx) { + if (update_idx) { + for (i = 0, dst_idx = 0; i < dst_dims.size(); ++i) + dst_idx += (src_counters[i] % dst_dims[i]) * dstStrides[i]; + update_idx = false; + } + reduce_prod[ithr * work_amount_dst + dst_idx] = func1(reduce_prod[ithr * work_amount_dst + dst_idx], src_data[src_idx]); + for (j = src_dims.size() - 1; j >= 0; j--) { + src_counters[j]++; + if (src_counters[j] < src_dims[j]) { + if (dst_dims[j] > 1) dst_idx += dstStrides[j]; + break; + } else { + src_counters[j] = 0; + update_idx = true; + } + } + } + }); + } + for (size_t dst_idx = 0; dst_idx < work_amount_dst; dst_idx++) { + for (size_t ithr = work_amount_dst; ithr < (nthr * work_amount_dst); ithr += work_amount_dst) + reduce_prod[dst_idx] = func2(reduce_prod[dst_idx], reduce_prod[dst_idx + ithr]); + dst_data[dst_idx] = reduce_prod[dst_idx]; + } + } +} + +REG_FACTORY_FOR(ImplFactory, ReduceAnd); +REG_FACTORY_FOR(ImplFactory, ReduceL1); +REG_FACTORY_FOR(ImplFactory, ReduceL2); +REG_FACTORY_FOR(ImplFactory, ReduceLogSum); +REG_FACTORY_FOR(ImplFactory, ReduceLogSumExp); +REG_FACTORY_FOR(ImplFactory, ReduceMax); +REG_FACTORY_FOR(ImplFactory, ReduceMean); +REG_FACTORY_FOR(ImplFactory, ReduceMin); +REG_FACTORY_FOR(ImplFactory, ReduceOr); +REG_FACTORY_FOR(ImplFactory, ReduceProd); +REG_FACTORY_FOR(ImplFactory, ReduceSum); +REG_FACTORY_FOR(ImplFactory, ReduceSumSquare); + +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_region_yolo.cpp b/inference-engine/src/extension/ext_region_yolo.cpp index a53869aa995613..c271acf227bae7 100644 --- a/inference-engine/src/extension/ext_region_yolo.cpp +++ b/inference-engine/src/extension/ext_region_yolo.cpp @@ -7,7 +7,11 @@ #include "defs.h" #include "softmax.h" #include -#include "simple_copy.h" +#include +#include +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) +#include +#endif namespace InferenceEngine { namespace Extensions { @@ -44,7 +48,9 @@ class RegionYoloImpl: public ExtLayerBase { int IC = (inputs[0]->getTensorDesc().getDims().size() > 1) ? inputs[0]->getTensorDesc().getDims()[1] : 1; int B = (inputs[0]->getTensorDesc().getDims().size() > 0) ? inputs[0]->getTensorDesc().getDims()[0] : 1; - simple_copy(dst_data, outputs[0]->byteSize(), src_data, (size_t)B * IC * IH * IW * sizeof(float)); + parallel_for(B * IC * IH * IW, [&](int i) { + dst_data[i] = src_data[i]; + }); int end_index = 0; int num_ = 0; @@ -58,23 +64,41 @@ class RegionYoloImpl: public ExtLayerBase { num_ = mask_size; } int inputs_size = IH * IW * num_ * (classes + coords + 1); + int first_index = 0; + int total_size = 2 * IH * IW; + +#if defined(HAVE_AVX512F) + const int block_size = 16; +#elif defined(HAVE_AVX2) + const int block_size = 8; +#elif defined(HAVE_SSE) + const int block_size = 4; +#endif + + auto calculate_func = [&](int start_index, int count) { +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + parallel_for(count / block_size, [&](int ib) { + vlogistic_activate(dst_data + start_index + ib * block_size); + }); + first_index = count / block_size * block_size; +#endif + parallel_for(count - first_index, [&](int i) { + dst_data[i + start_index + first_index] = logistic_activate(dst_data[i + start_index + first_index]); + }); + }; for (int b = 0; b < B; b++) { for (int n = 0; n < num_; n++) { - int index = entry_index(IW, IH, coords, classes, inputs_size, b, n * IW * IH, 0); - for (int i = index; i < index + 2 * IW * IH; i++) { - dst_data[i] = logistic_activate(dst_data[i]); - } - - index = entry_index(IW, IH, coords, classes, inputs_size, b, n * IW * IH, coords); - for (int i = index; i < index + end_index; i++) { - dst_data[i] = logistic_activate(dst_data[i]); - } + int index = b * inputs_size + n * IW * IH * (classes + coords + 1); + calculate_func(index, total_size); + + index = b * inputs_size + IW * IH * (n * (classes + coords + 1) + coords); + calculate_func(index, end_index); } } if (do_softmax) { - int index = entry_index(IW, IH, coords, classes, inputs_size, 0, 0, coords + 1); + int index = IW * IH * (coords + 1); int batch_offset = inputs_size / num; for (int b = 0; b < B * num; b++) softmax_generic(src_data + index + b * batch_offset, dst_data + index + b * batch_offset, 1, classes, @@ -91,16 +115,135 @@ class RegionYoloImpl: public ExtLayerBase { float do_softmax; std::vector mask; - inline int entry_index(int width, int height, int coords, int classes, int outputs, int batch, int location, - int entry) { - int n = location / (width * height); - int loc = location % (width * height); - return batch * outputs + n * width * height * (coords + classes + 1) + - entry * width * height + loc; + union U { + unsigned int as_uint_value; + float as_float_value; + int as_int_value; + }; + + const struct vals_for_logistic_activate_type { + U max_logf = {0x42b0c0a5}; // 88.3762589f + U min_logf = {0xc1766666}; // -14.5f + U log2ef = {0x3fb8aa3b}; // 1.44269502f + U ln2f = {0x3f317218}; // 0.69314718f + U p0 = {0x3f800001}; // 1.0000001f + U p1 = {0x3f800000}; // 1.0f + U p2 = {0x3efffe85}; // 0.4999887f + U p3 = {0x3e2aaa3e}; // 0.16666505f + U p4 = {0x3d2bb1b1}; // 0.041917507f + U p5 = {0x3c091ec1}; // 0.008369149f + U int_0x7f = {0x0000007f}; + U mask_sign = {0x80000000}; // mask to extract sign + U float_1 = {0x3f800000}; // 1.0f + U float_half = {0x3f000000}; // 0.5f + U shift_mantissa = {0x00000017}; // 23 + } vals_for_logistic_activate; + +#if defined(HAVE_AVX512F) + typedef __m512 vec_type_f; + typedef __m512i vec_type_i; +#elif defined(HAVE_AVX2) + typedef __m256 vec_type_f; + typedef __m256i vec_type_i; +#elif defined(HAVE_SSE) + typedef __m128 vec_type_f; + typedef __m128i vec_type_i; +#endif + +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + inline void vlogistic_activate(float *psrc) { + vec_type_f vaux1, vaux2, vaux3; + vec_type_f vsrc = _mm_uni_loadu_ps(psrc); + vaux2 = vsrc; + vaux2 = _mm_uni_and_ps(vaux2, _mm_uni_set1_ps(vals_for_logistic_activate.mask_sign.as_float_value)); + vsrc = _mm_uni_or_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.mask_sign.as_float_value)); + + vsrc = vexp(vsrc); + + vaux1 = _mm_uni_add_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.float_1.as_float_value)); + vsrc = _mm_uni_div_ps(vsrc, vaux1); + vaux3 = _mm_uni_sub_ps(_mm_uni_set1_ps(vals_for_logistic_activate.float_1.as_float_value), vsrc); + vsrc = _mm_uni_blendv_ps(vaux3, vsrc, vaux2); + + _mm_uni_storeu_ps(psrc, vsrc); + } + + inline vec_type_f vexp(vec_type_f vsrc) { + vec_type_f vaux0, vaux1, vaux3; + vec_type_i vaux2; + vsrc = _mm_uni_min_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.max_logf.as_float_value)); + vsrc = _mm_uni_max_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.min_logf.as_float_value)); + vaux0 = vsrc; + + vsrc = _mm_uni_mul_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.log2ef.as_float_value)); + vsrc = _mm_uni_add_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.float_half.as_float_value)); + vsrc = _mm_uni_floor_ps(vsrc); + vaux1 = _mm_uni_mul_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.ln2f.as_float_value)); + vaux0 = _mm_uni_sub_ps(vaux0, vaux1); + + vaux2 = _mm_uni_cvtps_epi32(vsrc); + vaux2 = _mm_uni_add_epi32(vaux2, _mm_uni_set1_epi32(vals_for_logistic_activate.int_0x7f.as_uint_value)); + vaux2 = _mm_uni_slli_epi32(vaux2, vals_for_logistic_activate.shift_mantissa.as_uint_value); + + vsrc = _mm_uni_set1_ps(vals_for_logistic_activate.p5.as_float_value); + vsrc = _mm_uni_mul_ps(vsrc, vaux0); + vsrc = _mm_uni_add_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.p4.as_float_value)); + vsrc = _mm_uni_mul_ps(vsrc, vaux0); + vsrc = _mm_uni_add_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.p3.as_float_value)); + vsrc = _mm_uni_mul_ps(vsrc, vaux0); + vsrc = _mm_uni_add_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.p2.as_float_value)); + vsrc = _mm_uni_mul_ps(vsrc, vaux0); + vsrc = _mm_uni_add_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.p1.as_float_value)); + vsrc = _mm_uni_mul_ps(vsrc, vaux0); + vsrc = _mm_uni_add_ps(vsrc, _mm_uni_set1_ps(vals_for_logistic_activate.p0.as_float_value)); + + vaux3 = _mm_uni_castsi_ps(vaux2); + vsrc = _mm_uni_mul_ps(vsrc, vaux3); + + return vsrc; + } +#endif + + inline float logistic_activate(float src) { + U aux2; + aux2.as_float_value = src; + int sign = aux2.as_int_value >> 31; + if (sign == 0) + src *= -1; + + src = exp(src); + + src = src / (src + 1); + if (sign == 0) + src = 1 - src; + + return src; } - inline float logistic_activate(float x) { - return 1.f / (1.f + exp(-x)); + inline float exp(float src) { + float aux0; + U aux2; + src = std::min(src, vals_for_logistic_activate.max_logf.as_float_value); + src = std::max(src, vals_for_logistic_activate.min_logf.as_float_value); + aux0 = src; + + src = src * vals_for_logistic_activate.log2ef.as_float_value + vals_for_logistic_activate.float_half.as_float_value; + src = std::floor(src); + aux0 = aux0 - src * (vals_for_logistic_activate.ln2f.as_float_value); + + aux2.as_int_value = static_cast(src); + aux2.as_int_value = aux2.as_int_value + vals_for_logistic_activate.int_0x7f.as_int_value; + aux2.as_int_value = aux2.as_int_value << vals_for_logistic_activate.shift_mantissa.as_int_value; + + src = vals_for_logistic_activate.p5.as_float_value; + src = src * aux0 + vals_for_logistic_activate.p4.as_float_value; + src = src * aux0 + vals_for_logistic_activate.p3.as_float_value; + src = src * aux0 + vals_for_logistic_activate.p2.as_float_value; + src = src * aux0 + vals_for_logistic_activate.p1.as_float_value; + src = src * aux0 + vals_for_logistic_activate.p0.as_float_value; + src *= aux2.as_float_value; + + return src; } }; diff --git a/inference-engine/src/extension/ext_resample.cpp b/inference-engine/src/extension/ext_resample.cpp index 5c3492c91e5ac0..d4d187d86d3711 100644 --- a/inference-engine/src/extension/ext_resample.cpp +++ b/inference-engine/src/extension/ext_resample.cpp @@ -13,7 +13,7 @@ #include #include #include "ie_parallel.hpp" -#include "simple_copy.h" +#include "common/simple_copy.h" namespace InferenceEngine { namespace Extensions { @@ -31,12 +31,21 @@ class ResampleImpl: public ExtLayerBase { if (layer->insData.size() != 1 || layer->outData.empty()) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[0].lock()->dims.size() != 4) - THROW_IE_EXCEPTION << "Resample supports only 4D blobs!"; + if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4 && + layer->insData[0].lock()->getTensorDesc().getDims().size() != 5) + THROW_IE_EXCEPTION << "Resample supports only 4D and 5D blobs!"; type = layer->GetParamAsString("type"); antialias = layer->GetParamAsBool("antialias", false); + if (type == "caffe.ResampleParameter.LINEAR" && + layer->insData[0].lock()->getTensorDesc().getDims().size() == 5) + THROW_IE_EXCEPTION << "Resample doesn't support LINEAR interpolation for 5D input!"; + + if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32 && + layer->insData[0].lock()->getTensorDesc().getDims().size() == 5) + THROW_IE_EXCEPTION << "Resample supports 5D input only for FP32 precision!"; + #if defined(HAVE_AVX512F) auto blk_layout = ConfLayout::BLK16; #else @@ -57,16 +66,20 @@ class ResampleImpl: public ExtLayerBase { #ifdef WIN32 #undef IN #endif - Layout layout = inputs[0]->layout(); - Precision precision = inputs[0]->precision(); + const Layout &layout = inputs[0]->getTensorDesc().getLayout(); + const Precision &precision = inputs[0]->getTensorDesc().getPrecision(); + + int ndims = inputs[0]->getTensorDesc().getDims().size(); size_t IN = inputs[0]->getTensorDesc().getDims()[0]; size_t IC = inputs[0]->getTensorDesc().getDims()[1]; - size_t IH = inputs[0]->getTensorDesc().getDims()[2]; - size_t IW = inputs[0]->getTensorDesc().getDims()[3]; + size_t ID = ndims == 5 ? inputs[0]->getTensorDesc().getDims()[ndims - 3] : 1; + size_t IH = inputs[0]->getTensorDesc().getDims()[ndims - 2]; + size_t IW = inputs[0]->getTensorDesc().getDims()[ndims - 1]; - size_t OH = outputs[0]->getTensorDesc().getDims()[2]; - size_t OW = outputs[0]->getTensorDesc().getDims()[3]; + size_t OD = ndims == 5 ? outputs[0]->getTensorDesc().getDims()[ndims - 3] : 1; + size_t OH = outputs[0]->getTensorDesc().getDims()[ndims - 2]; + size_t OW = outputs[0]->getTensorDesc().getDims()[ndims - 1]; if (IW == OW && IH == OH && type == "caffe.ResampleParameter.LINEAR") { size_t size = IN * IC * IH * IW; @@ -79,37 +92,38 @@ class ResampleImpl: public ExtLayerBase { float fx = static_cast(IW) / static_cast(OW); float fy = static_cast(IH) / static_cast(OH); + float fz = static_cast(ID) / static_cast(OD); - bool isDownsample = (fx > 1) || (fy > 1); + bool isDownsample = (fx > 1) || (fy > 1) || (fz > 1); if (type == "caffe.ResampleParameter.NEAREST") { - if (!isDownsample && fx == 0.25f && fy == 0.25f) { - if (layout == NCHW || layout == NHWC) { + if (!isDownsample && fx == 0.25f && fy == 0.25f && fz == 0.25f) { + if (layout == NCHW || layout == NHWC || layout == NCDHW || layout == NDHWC) { if (precision == Precision::FP32) { - Upsample_Nearest_PLN(src_data, dst_data, IN, IC, IH, IW, layout); + Upsample_Nearest_PLN(src_data, dst_data, IN, IC, ID, IH, IW, layout); } else { Upsample_Nearest_PLN(reinterpret_cast(src_data), - reinterpret_cast(dst_data), IN, IC, IH, IW, layout); + reinterpret_cast(dst_data), IN, IC, ID, IH, IW, layout); } } else { - Upsample_Nearest_BLK<4>(src_data, dst_data, IN, IC, IH, IW); + Upsample_Nearest_BLK<4>(src_data, dst_data, IN, IC, ID, IH, IW, ndims); } } else if (!isDownsample && fx == 0.5f && fy == 0.5f) { - if (layout == NCHW || layout == NHWC) { + if (layout == NCHW || layout == NHWC || layout == NCDHW || layout == NDHWC) { if (precision == Precision::FP32) { - Upsample_Nearest_PLN(src_data, dst_data, IN, IC, IH, IW, layout); + Upsample_Nearest_PLN(src_data, dst_data, IN, IC, ID, IH, IW, layout); } else { Upsample_Nearest_PLN(reinterpret_cast(src_data), - reinterpret_cast(dst_data), IN, IC, IH, IW, layout); + reinterpret_cast(dst_data), IN, IC, ID, IH, IW, layout); } } else { - Upsample_Nearest_BLK<2>(src_data, dst_data, IN, IC, IH, IW); + Upsample_Nearest_BLK<2>(src_data, dst_data, IN, IC, ID, IH, IW, ndims); } } else { - if (layout == NCHW) { - NearestNeighborKernel_PLN(src_data, dst_data, IN, IC, IH, IW, fx, fy, OH, OW); + if (layout == NCHW || layout == NCDHW) { + NearestNeighborKernel_PLN(src_data, dst_data, IN, IC, ID, IH, IW, fx, fy, fz, OD, OH, OW); } else { - NearestNeighborKernel_BLK(src_data, dst_data, IN, IC, IH, IW, fx, fy, OH, OW); + NearestNeighborKernel_BLK(src_data, dst_data, IN, IC, ID, IH, IW, fx, fy, fz, OD, OH, OW); } } } else if (type == "caffe.ResampleParameter.LINEAR") { @@ -146,8 +160,8 @@ class ResampleImpl: public ExtLayerBase { for (size_t oy = 0; oy < oh; oy++) { for (size_t ox = 0; ox < ow; ox++) { - float ix = ox * fx + fy / 2.0f - 0.5f; - float iy = oy * fy + fx / 2.0f - 0.5f; + float ix = ox * fx + fx / 2.0f - 0.5f; + float iy = oy * fy + fy / 2.0f - 0.5f; int ix_r = static_cast(round(ix)); int iy_r = static_cast(round(iy)); @@ -183,48 +197,62 @@ class ResampleImpl: public ExtLayerBase { } } - static void NearestNeighborKernel_PLN(const float *in_ptr_, float *out_ptr_, int B, int C, int IH, int IW, float fx, float fy, int OH, int OW) { + static void NearestNeighborKernel_PLN(const float *in_ptr_, float *out_ptr_, int B, int C, int ID, int IH, int IW, + float fx, float fy, float fz, int OD, int OH, int OW) { for (int b = 0; b < B; b++) { for (int c = 0; c < C; c++) { - const float *in_ptr = in_ptr_ + IW * IH * C * b + IW * IH * c; - float *out_ptr = out_ptr_ + OW * OH * C * b + OW * OH * c; + const float *in_ptr = in_ptr_ + IW * IH * ID * C * b + IW * IH * ID * c; + float *out_ptr = out_ptr_ + OW * OH * OD * C * b + OW * OH * OD * c; - for (int oy = 0; oy < OH; oy++) { - for (int ox = 0; ox < OW; ox++) { - float ix = ox * fx + fy / 2.0f - 0.5f; - float iy = oy * fy + fx / 2.0f - 0.5f; + for (int oz = 0; oz < OD; oz++) { + for (int oy = 0; oy < OH; oy++) { + for (int ox = 0; ox < OW; ox++) { + float ix = ox * fx + fx / 2.0f - 0.5f; + float iy = oy * fy + fy / 2.0f - 0.5f; + float iz = oz * fz + fz / 2.0f - 0.5f; - size_t ix_r = static_cast(round(ix)); - size_t iy_r = static_cast(round(iy)); + size_t ix_r = static_cast(round(ix)); + size_t iy_r = static_cast(round(iy)); + size_t iz_r = static_cast(round(iz)); - out_ptr[oy * OW + ox] = in_ptr[iy_r * IW + ix_r]; + out_ptr[oz * OH * OW + oy * OW + ox] = in_ptr[iz_r * IH * IW + iy_r * IW + ix_r]; + } } } } } } - static void NearestNeighborKernel_BLK(const float *in_ptr_, float *out_ptr_, int B, int C, int IH, int IW, float fx, float fy, int OH, int OW) { - int blk_size = 8; + static void NearestNeighborKernel_BLK(const float *in_ptr_, float *out_ptr_, int B, int C, int ID, int IH, int IW, + float fx, float fy, float fz, int OD, int OH, int OW) { +#if defined(HAVE_AVX512F) + auto blk_size = 16; +#else + auto blk_size = 8; +#endif int CB = div_up(C, blk_size); for (int b = 0; b < B; b++) { for (int cb = 0; cb < CB; cb++) { - const float *in_ptr = in_ptr_ + IW * IH * CB * blk_size * b + IW * IH * cb * blk_size; - float *out_ptr = out_ptr_ + OW * OH * CB * blk_size * b + OW * OH * cb * blk_size; + const float *in_ptr = in_ptr_ + IW * IH * ID * CB * blk_size * b + IW * IH * ID * cb * blk_size; + float *out_ptr = out_ptr_ + OW * OH * OD * CB * blk_size * b + OW * OH * OD * cb * blk_size; - for (int oy = 0; oy < OH; oy++) { - for (int ox = 0; ox < OW; ox++) { - float ix = ox * fx + fy / 2.0f - 0.5f; - float iy = oy * fy + fx / 2.0f - 0.5f; + for (int oz = 0; oz < OD; oz++) { + for (int oy = 0; oy < OH; oy++) { + for (int ox = 0; ox < OW; ox++) { + float ix = ox * fx + fx / 2.0f - 0.5f; + float iy = oy * fy + fy / 2.0f - 0.5f; + float iz = oz * fz + fz / 2.0f - 0.5f; - size_t ix_r = static_cast(round(ix)); - size_t iy_r = static_cast(round(iy)); + size_t ix_r = static_cast(round(ix)); + size_t iy_r = static_cast(round(iy)); + size_t iz_r = static_cast(round(iz)); - for (int c = 0; c < blk_size; c++) { - float value = in_ptr[iy_r * IW * blk_size + ix_r * blk_size + c]; + for (int c = 0; c < blk_size; c++) { + float value = in_ptr[iz_r * IH * IW * blk_size + iy_r * IW * blk_size + ix_r * blk_size + c]; - out_ptr[oy * OW * blk_size + ox * blk_size + c] = value; + out_ptr[oz * OH * OW * blk_size + oy * OW * blk_size + ox * blk_size + c] = value; + } } } } @@ -233,25 +261,33 @@ class ResampleImpl: public ExtLayerBase { } template - static void Upsample_Nearest_PLN(const T *in_ptr_, T *out_ptr_, int B, int C, int IH, int IW, Layout layout) { + static void Upsample_Nearest_PLN(const T *in_ptr_, T *out_ptr_, int B, int C, int ID, int IH, int IW, Layout layout) { + int factor_d = layout == NCDHW || layout == NDHWC ? factor : 1; + + int OD = factor_d * ID; int OH = factor * IH; int OW = factor * IW; - if (layout == NCHW) { + if (layout == NCHW || layout == NCDHW) { for (int b = 0; b < B; b++) { for (int c = 0; c < C; c++) { - const T *in_ptr = in_ptr_ + IW * IH * C * b + IW * IH * c; - T *out_ptr = out_ptr_ + OW * OH * C * b + OW * OH * c; - - for (int iy = 0; iy < IH; iy++) { - for (int ix = 0; ix < IW; ix++) { - int oy = factor * iy; - int ox = factor * ix; - float value = in_ptr[iy * IW + ix]; - - for (int fh = 0; fh < factor; fh++) { - for (int fw = 0; fw < factor; fw++) { - out_ptr[(oy + fh) * OW + ox + fw] = static_cast(value); + const T *in_ptr = in_ptr_ + IW * IH * ID * C * b + IW * IH * ID * c; + T *out_ptr = out_ptr_ + OW * OH * OD * C * b + OW * OH * OD * c; + + for (int iz = 0; iz < ID; iz++) { + for (int iy = 0; iy < IH; iy++) { + for (int ix = 0; ix < IW; ix++) { + int oz = factor_d * iz; + int oy = factor * iy; + int ox = factor * ix; + float value = in_ptr[iz * IH * IW + iy * IW + ix]; + + for (int fd = 0; fd < factor_d; fd++) { + for (int fh = 0; fh < factor; fh++) { + for (int fw = 0; fw < factor; fw++) { + out_ptr[(oz + fd) * OH * OW + (oy + fh) * OW + ox + fw] = static_cast(value); + } + } } } } @@ -269,35 +305,30 @@ class ResampleImpl: public ExtLayerBase { int stepX = factor; int stepY = factor; -#ifdef _OPENMP -#pragma omp parallel for collapse(2) -#endif - for (int mb = 0; mb < B; mb++) { - for (int oh = 0; oh < OH; oh += stepY) { - size_t dst_off = mb * OCOWOH + (oh * OW) * block_size; - size_t src_off = mb * ICIWIH + (oh / stepY * IW) * block_size; + parallel_for2d(B, (OH / stepY), [&](size_t mb, size_t oh) { + size_t dst_off = mb * OCOWOH + oh * stepY * OW * block_size; + size_t src_off = mb * ICIWIH + oh * IW * block_size; - for (int ow = 0; ow < OW; ow += stepX) { - size_t dst_off_curr = dst_off + ow * block_size; - size_t src_off_curr = src_off + ow / stepX * block_size; + for (int ow = 0; ow < OW; ow += stepX) { + size_t dst_off_curr = dst_off + ow * block_size; + size_t src_off_curr = src_off + ow / stepX * block_size; - memcpy(&out_ptr_[dst_off_curr], &in_ptr_[src_off_curr], block_size_bytes); + memcpy(&out_ptr_[dst_off_curr], &in_ptr_[src_off_curr], block_size_bytes); - for (int owx = 1; owx < stepX; owx++) { - memcpy(&out_ptr_[dst_off_curr + block_size * owx], &in_ptr_[src_off_curr], block_size_bytes); - } + for (int owx = 1; owx < stepX; owx++) { + memcpy(&out_ptr_[dst_off_curr + block_size * owx], &in_ptr_[src_off_curr], block_size_bytes); } + } - for (int ohy = 1; ohy < stepY; ohy++) { - memcpy(&out_ptr_[dst_off + OW * block_size * ohy], &out_ptr_[dst_off], block_size_bytes * OW); - } + for (int ohy = 1; ohy < stepY; ohy++) { + memcpy(&out_ptr_[dst_off + OW * block_size * ohy], &out_ptr_[dst_off], block_size_bytes * OW); } - } + }); } } template - static void Upsample_Nearest_BLK(const float *in_ptr_, float *out_ptr_, int B, int C, int IH, int IW) { + static void Upsample_Nearest_BLK(const float *in_ptr_, float *out_ptr_, int B, int C, int ID, int IH, int IW, int ndims) { #if defined(HAVE_AVX512F) int blk_size = 16; #else @@ -312,48 +343,61 @@ class ResampleImpl: public ExtLayerBase { int CB = div_up(C, blk_size); + int factor_d = ndims == 5 ? factor : 1; + + int OD = factor_d * ID; int OH = factor * IH; int OW = factor * IW; parallel_for2d(B, CB, [&](int b, int cb) { #if defined(HAVE_AVX2) || defined(HAVE_AVX512F) - const float *in_ptr = in_ptr_ + IW * IH * CB * blk_size * b + IW * IH * cb * blk_size; - float *out_ptr = out_ptr_ + OW * OH * CB * blk_size * b + OW * OH * cb * blk_size; + const float *in_ptr = in_ptr_ + IW * IH * ID * CB * blk_size * b + IW * IH * ID * cb * blk_size; + float *out_ptr = out_ptr_ + OW * OH * OD * CB * blk_size * b + OW * OH * OD * cb * blk_size; + for (int iz = 0; iz < ID; iz++) { for (int iy = 0; iy < IH; iy++) { for (int ix = 0; ix < IW; ix++) { + int oz = factor_d * iz; int oy = factor * iy; int ox = factor * ix; - vec_type vsrc = _mm_uni_loadu_ps(in_ptr + iy * IW * blk_size + ix * blk_size); + vec_type vsrc = _mm_uni_loadu_ps(in_ptr + iz * IH * IW * blk_size + iy * IW * blk_size + ix * blk_size); - for (int fh = 0; fh < factor; fh++) { - for (int fw = 0; fw < factor; fw++) { - _mm_uni_storeu_ps(out_ptr + (oy + fh) * OW * blk_size + (ox + fw) * blk_size, vsrc); + for (int fz = 0; fz < factor_d; fz++) { + for (int fh = 0; fh < factor; fh++) { + for (int fw = 0; fw < factor; fw++) { + _mm_uni_storeu_ps(out_ptr + (oz + fz) * OH * OW * blk_size + (oy + fh) * OW * blk_size + (ox + fw) * blk_size, vsrc); + } } } } } + } #else - const float *in_ptr = in_ptr_ + IW * IH * CB * blk_size * b + IW * IH * cb * blk_size; - float *out_ptr = out_ptr_ + OW * OH * CB * blk_size * b + OW * OH * cb * blk_size; + const float *in_ptr = in_ptr_ + IW * IH * ID * CB * blk_size * b + IW * IH * ID * cb * blk_size; + float *out_ptr = out_ptr_ + OW * OH * OD * CB * blk_size * b + OW * OH * OD * cb * blk_size; + for (int iz = 0; iz < ID; iz++) { for (int iy = 0; iy < IH; iy++) { for (int ix = 0; ix < IW; ix++) { + int oz = factor_d * iz; int oy = factor * iy; int ox = factor * ix; for (int c = 0; c < blk_size; c++) { - float value = in_ptr[iy * IW * blk_size + ix * blk_size + c]; + float value = in_ptr[iz * IH * IW * blk_size + iy * IW * blk_size + ix * blk_size + c]; - for (int fh = 0; fh < factor; fh++) { - for (int fw = 0; fw < factor; fw++) { - out_ptr[(oy + fh) * OW * blk_size + (ox + fw) * blk_size + c] = value; + for (int fz = 0; fz < factor_d; fz++) { + for (int fh = 0; fh < factor; fh++) { + for (int fw = 0; fw < factor; fw++) { + out_ptr[(oz + fz) * OH * OW * blk_size + (oy + fh) * OW * blk_size + (ox + fw) * blk_size + c] = value; + } } } } } } + } #endif }); } diff --git a/inference-engine/src/extension/ext_reverse_sequence.cpp b/inference-engine/src/extension/ext_reverse_sequence.cpp index b53692343bc6f3..f50005a5c7c06d 100644 --- a/inference-engine/src/extension/ext_reverse_sequence.cpp +++ b/inference-engine/src/extension/ext_reverse_sequence.cpp @@ -73,7 +73,7 @@ class ReverseSequenceImpl: public ExtLayerBase { float* dst_data = outputs[0]->cbuffer().as() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - switch (inputs[REVERSESEQUENCE_LENGTHS]->precision()) { + switch (inputs[REVERSESEQUENCE_LENGTHS]->getTensorDesc().getPrecision()) { case Precision::FP32: { float *seq_lengths_data = inputs[REVERSESEQUENCE_LENGTHS]->cbuffer().as() + inputs[REVERSESEQUENCE_LENGTHS]->getTensorDesc().getBlockingDesc().getOffsetPadding(); diff --git a/inference-engine/src/extension/ext_select.cpp b/inference-engine/src/extension/ext_select.cpp new file mode 100644 index 00000000000000..f77137db0a3bb4 --- /dev/null +++ b/inference-engine/src/extension/ext_select.cpp @@ -0,0 +1,78 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include +#include +#include +#include +#include +#include "ie_parallel.hpp" + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class SelectImpl: public ExtLayerBase { + enum {condition, then_, else_, numOfInputs}; + +public: + explicit SelectImpl(const CNNLayer* layer) { + try { + if (numOfInputs != layer->insData.size() || 1 != layer->outData.size()) { + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; + } + + auto conditionPrecision = layer->insData[condition].lock()->getTensorDesc().getPrecision(); + + if (Precision::I32 != conditionPrecision && Precision::FP32 != conditionPrecision) { + THROW_IE_EXCEPTION << layer->name << " Incorrect condition tensor precision: " << conditionPrecision << ". Should be I32 or FP32"; + } + + addConfig(layer, {{ConfLayout::PLN, false}, + {ConfLayout::PLN, false}, + {ConfLayout::PLN, false}}, + {{ConfLayout::PLN, false}}); + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + + StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *) noexcept override { + const int32_t *conditionData = inputs[condition]->cbuffer().as(); + + const float *thenData = inputs[then_]->cbuffer().as(); + + const float *elseData = inputs[else_]->cbuffer().as(); + + float* dstData = outputs[0]->cbuffer().as(); + enum {N, C, H, W, Dims}; + int dim[Dims] = {1, 1, 1, 1}; + int cdim[Dims] = {1, 1, 1, 1}; + + SizeVector dims = inputs[then_]->getTensorDesc().getDims(); + std::copy(std::begin(dims), std::end(dims), std::begin(dim) + (Dims - dims.size())); + + SizeVector cDims = inputs[condition]->getTensorDesc().getDims(); + std::copy(std::begin(cDims), std::end(cDims), std::begin(cdim) + (Dims - cDims.size())); + + parallel_for3d(dim[N], dim[H], dim[W], [&](int b, int h, int w) { + for (int c = 0; c < dim[C]; c++) { + dstData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w] + = conditionData[(b % cdim[N])*cdim[C]*cdim[H]*cdim[W] + (c % cdim[C])*cdim[H]*cdim[W] + (h % cdim[H])*cdim[W] + (w % cdim[W])] + ? thenData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w] + : elseData[b*dim[C]*dim[H]*dim[W] + c*dim[H]*dim[W] + h*dim[W] + w]; + } + }); + return OK; + } +}; + + +REG_FACTORY_FOR(ImplFactory, Select); +} // namespace Cpu +} // namespace Extensions +} // namespace InferenceEngine diff --git a/inference-engine/src/extension/ext_simplernms.cpp b/inference-engine/src/extension/ext_simplernms.cpp index cb0e717e4b7839..c00e76fd91758d 100644 --- a/inference-engine/src/extension/ext_simplernms.cpp +++ b/inference-engine/src/extension/ext_simplernms.cpp @@ -202,7 +202,7 @@ class SimplerNMSImpl : public ExtLayerBase { if (layer->insData.size() != 3 || layer->outData.size() != 1) THROW_IE_EXCEPTION << "Incorrect number of input/output edges!"; - if (layer->insData[0].lock()->dims.size() != 4) + if (layer->insData[0].lock()->getTensorDesc().getDims().size() != 4) THROW_IE_EXCEPTION << "SimplerNMS supports only 4D blobs!"; min_box_size_ = layer->GetParamAsInt("min_bbox_size"); diff --git a/inference-engine/src/extension/ext_squeeze.cpp b/inference-engine/src/extension/ext_squeeze.cpp index 88d65533d49480..4a90e035c8c12b 100644 --- a/inference-engine/src/extension/ext_squeeze.cpp +++ b/inference-engine/src/extension/ext_squeeze.cpp @@ -10,6 +10,7 @@ #include #include #include "ie_parallel.hpp" +#include "common/simple_copy.h" namespace InferenceEngine { namespace Extensions { @@ -22,98 +23,38 @@ class SqueezeImpl: public ExtLayerBase { if (layer->insData.empty() || layer->outData.empty()) THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!"; - if (layer->insData.size() != 2) + if (layer->insData.size() != 1 && layer->insData.size() != 2) THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; - idx_dims = layer->insData[SQUEEZE_INDEXES].lock()->getTensorDesc().getDims(); - if (idx_dims.size() > 1) - THROW_IE_EXCEPTION << layer->name << " Index vector should be 1 dimension"; - - if (layer->insData[SQUEEZE_INDEXES].lock()->getTensorDesc().getPrecision() != Precision::I32 && - layer->insData[SQUEEZE_INDEXES].lock()->getTensorDesc().getPrecision() != Precision::FP32) - THROW_IE_EXCEPTION << layer->name << " Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!"; - - data_dims = layer->insData[SQUEEZE_DATA].lock()->getTensorDesc().getDims(); + SizeVector data_dims = layer->insData[0].lock()->getTensorDesc().getDims(); SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims(); if (data_dims.size() < dst_dims.size()) THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!"; - if (data_dims.size() <= idx_dims[0] && !(data_dims.size() == 1 && idx_dims[0] == 1)) - THROW_IE_EXCEPTION << layer->name << " Incompatible number of data dimensions and indexes vector length!"; + if (layer->insData.size() == 1) + addConfig(layer, { { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } }); + else + addConfig(layer, { { ConfLayout::PLN, false, 0 }, { ConfLayout::PLN, false, 0 } }, { { ConfLayout::PLN, false, 0 } }); - addConfig(layer, { { ConfLayout::PLN, false, 0 }, { ConfLayout::ANY, true } }, { { ConfLayout::PLN, false, 0 } }); + // WA to enable the implementation only for equal input and output precisions + confs[0].inConfs[0].desc.setPrecision(confs[0].outConfs[0].desc.getPrecision()); } catch (InferenceEngine::details::InferenceEngineException &ex) { errorMsg = ex.what(); } } StatusCode execute(std::vector& inputs, std::vector& outputs, ResponseDesc *resp) noexcept override { - switch (inputs[SQUEEZE_INDEXES]->precision()) { - case Precision::FP32: { - float *idx_data = inputs[SQUEEZE_INDEXES]->cbuffer().as() + - inputs[SQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - for (size_t i = 0; i < idx_dims[0]; i++) { - float axis = idx_data[i]; - if (axis < 0) - axis += data_dims.size(); + const uint8_t *src = inputs[0]->cbuffer().as() + inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding()*inputs[0]->element_size(); + uint8_t* dst = outputs[0]->cbuffer().as() + outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding()*outputs[0]->element_size(); - if (axis > static_cast(data_dims.size())) { - if (resp) { - std::string errorMsg = "Index to squeeze exceeds data tensor dimension"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } else if (data_dims[static_cast(axis)] != 1) { - if (resp) { - std::string errorMsg = "Index to squeeze of data tensor dimension is not 1"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } - } - } - break; - case Precision::I32: { - int32_t *idx_data = inputs[SQUEEZE_INDEXES]->cbuffer().as() + - inputs[SQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding(); - for (size_t i = 0; i < idx_dims[0]; i++) { - int32_t axis = idx_data[i]; - if (axis < 0) - axis += data_dims.size(); - - if (axis > static_cast(data_dims.size())) { - if (resp) { - std::string errorMsg = "Index to squeeze exceeds data tensor dimension"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } else if (data_dims[axis] != 1) { - if (resp) { - std::string errorMsg = "Index to squeeze of data tensor dimension is not 1"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return PARAMETER_MISMATCH; - } - } - } - break; - default: - if (resp) { - std::string errorMsg = "Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!"; - errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); - } - return GENERAL_ERROR; + if (src != dst) { + size_t srcSize = inputs[0]->byteSize(); + size_t dstSize = outputs[0]->byteSize(); + simple_copy(dst, dstSize, src, srcSize); } return OK; } - -private: - const size_t SQUEEZE_DATA = 0; - const size_t SQUEEZE_INDEXES = 1; - - SizeVector data_dims; - SizeVector idx_dims; }; REG_FACTORY_FOR(ImplFactory, Squeeze); diff --git a/inference-engine/src/extension/ext_topk.cpp b/inference-engine/src/extension/ext_topk.cpp new file mode 100644 index 00000000000000..a3dd7a32e67fc3 --- /dev/null +++ b/inference-engine/src/extension/ext_topk.cpp @@ -0,0 +1,580 @@ +// Copyright (C) 2018-2019 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ext_list.hpp" +#include "ext_base.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include "ie_parallel.hpp" +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) +#include +#endif + +namespace InferenceEngine { +namespace Extensions { +namespace Cpu { + +class TopKImpl: public ExtLayerBase { +public: + explicit TopKImpl(const CNNLayer* layer) { + try { + if (layer->insData.size() != 2) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!"; + + if (layer->outData.size() != 1 && layer->outData.size() != 2) + THROW_IE_EXCEPTION << layer->name << " Incorrect number of output edges!"; + + if (layer->insData[TOPK_DATA].lock()->getTensorDesc().getPrecision() != Precision::FP32 || + layer->insData[TOPK_K].lock()->getTensorDesc().getPrecision() != Precision::I32) + THROW_IE_EXCEPTION << layer->name << " Incorrect input data/index values precision."; + + if (layer->insData[TOPK_K].lock()->getTensorDesc().getDims().size() > 1) + THROW_IE_EXCEPTION << layer->name << " Index vector should be 1 dimension"; + + SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims(); + SizeVector src_data_dims = layer->insData[TOPK_DATA].lock()->getTensorDesc().getDims(); + if (src_data_dims.size() != dst_dims.size()) + THROW_IE_EXCEPTION << layer->name << " Incorrect input/output tensor dimension sizes"; + + if (layer->outData.size() == 2) { + if (layer->outData[TOPK_VALUE]->getTensorDesc().getPrecision() != Precision::FP32) + THROW_IE_EXCEPTION << layer->name << " Incorrect output data tensor precision. Only FP32 is supported!"; + + SizeVector dst_idx_dims = layer->outData[TOPK_INDEX]->getTensorDesc().getDims(); + if (dst_dims.size() != dst_idx_dims.size()) + THROW_IE_EXCEPTION << layer->name << " Incorrect output tensor dimension sizes"; + + for (size_t i = 0; i < dst_dims.size(); i++) { + if (dst_dims[i] != dst_idx_dims[i]) + THROW_IE_EXCEPTION << layer->name << " Input/output tensor dimension mismatch"; + } + } + + src_dims = layer->insData[TOPK_DATA].lock()->getTensorDesc().getDims(); + int axis_ = layer->GetParamAsInt("axis", -1); + if (axis_ < 0) + axis_ += src_dims.size(); + + axis = static_cast(axis_); + + if (src_dims.size() < (1 + axis)) + THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!"; + + if (layer->GetParamAsString("mode", "max") == "max") + mode_max = true; + else + mode_max = false; + + if (layer->GetParamAsString("sort", "index") == "value") + sort_value = true; + else + sort_value = false; + + int j; + for (j = src_dims.size() - 1; j >= 0; j--) { + if (src_dims[j] != 1) break; + } + if (static_cast(j) == axis) is_last_dim = true; + + for (size_t i = 0; i < axis; i++) { + axis_step *= src_dims[i]; + if (src_data_dims[i] != dst_dims[i]) + THROW_IE_EXCEPTION << layer->name << " Input/output tensor dimension mismatch"; + } + axis_dim = src_dims[axis]; + for (size_t i = (axis + 1); i < src_dims.size(); i++) { + axis_stride *= src_dims[i]; + if (src_data_dims[i] != dst_dims[i]) + THROW_IE_EXCEPTION << layer->name << " Input/output tensor dimension mismatch"; + } + dim = static_cast(src_dims[axis]); + before_num = count(src_dims, 0, axis); + + if (layer->outData.size() == 1) { + addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, + { DataConfigurator(ConfLayout::PLN) }); + } else { + addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, + { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }); + + // TODO: WA... While ICNNNetwork has no clear rule to fill tensor precision + // it use precision of parent layer. So each output tensor Data object has + // precision of producing layer. For TopK that is not true. Second output is + // integer tensor. Will change it for corresponding output desc. + confs.back().outConfs[1].desc.setPrecision(Precision::I32); + } + } catch (InferenceEngine::details::InferenceEngineException &ex) { + errorMsg = ex.what(); + } + } + +#if defined(HAVE_AVX512F) + const int block_size = 16; + typedef __m512 vec_type_f; + typedef __m512i vec_type_i; + typedef __mmask16 vmask_type; +#elif defined(HAVE_AVX2) + const int block_size = 8; + typedef __m256 vec_type_f; + typedef __m256i vec_type_i; + typedef __m256 vmask_type; +#elif defined(HAVE_SSE) + const int block_size = 4; + typedef __m128 vec_type_f; + typedef __m128i vec_type_i; + typedef __m128 vmask_type; +#else + typedef float vec_type_f; + typedef int vmask_type; +#endif + + struct cmpgt_ps { + static inline vmask_type cmp_ps(const vec_type_f _Left, const vec_type_f _Right) { +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + return _mm_uni_cmpgt_ps(_Left, _Right); +#else + return _Left > _Right ? _Left : _Right; +#endif + } + }; + + struct cmplt_ps { + static inline vmask_type cmp_ps(const vec_type_f _Left, const vec_type_f _Right) { +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + return _mm_uni_cmpgt_ps(_Right, _Left); +#else + return _Right > _Left ? _Right : _Left; +#endif + } + }; + + template class Compare2> + void top1_axis(const float* src_data, float* dst_data, int* dst_idx, SizeVector in_dims) { + int after_num = count(in_dims, axis + 1, in_dims.size()); + int first_index = 0; + +#if defined(HAVE_SSE) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + parallel_for2d(before_num, after_num / block_size, [&](int i0, int ib1) { + int s_index = i0 * dim * after_num + ib1 * block_size; + vec_type_f vmax_val = _mm_uni_loadu_ps(src_data + s_index); + vec_type_i vindex_max_val = _mm_uni_setzero_si(); + for (int i2 = 1; i2 < dim; i2++) { + s_index += after_num; + vec_type_f vsrc = _mm_uni_loadu_ps(src_data + s_index); + vmask_type vmask = Compare1::cmp_ps(vsrc, vmax_val); + vmax_val = _mm_uni_blendv_ps(vmax_val, vsrc, vmask); + + vec_type_i vindex_cur_val = _mm_uni_set1_epi32(i2); +#if defined(HAVE_AVX512F) + vindex_max_val = _mm512_mask_blend_epi32(vmask, vindex_max_val, vindex_cur_val); +#else + vindex_max_val = _mm_uni_blendv_epi8(vindex_max_val, vindex_cur_val, _mm_uni_castps_si(vmask)); +#endif + } + if (dst_data) + _mm_uni_storeu_ps(dst_data + i0 * after_num + ib1 * block_size, vmax_val); + if (dst_idx) + _mm_uni_storeu_si(reinterpret_cast(dst_idx + i0 * after_num + ib1 * block_size), vindex_max_val); + }); + first_index = after_num / block_size * block_size; +#endif + int rest = after_num - first_index; + parallel_for2d(before_num, rest, [&](int i0, int i1) { + int index_max_val = 0; + int s_index = i0 * dim * after_num + first_index + i1; + float max_val = src_data[s_index]; + for (int i2 = 1; i2 < dim; i2++) { + s_index += after_num; + if (Compare2()(src_data[s_index], max_val)) { + max_val = src_data[s_index]; + index_max_val = i2; + } + } + if (dst_data) + dst_data[i0 * after_num + first_index + i1] = max_val; + if (dst_idx) + dst_idx[i0 * after_num + first_index + i1] = index_max_val; + }); + } + + template